-
Notifications
You must be signed in to change notification settings - Fork 717
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Relocating public examples back to the main repository (#13292)
* First init of relocating examples * Add Scala examples * Rename offline notebook * Fix a bad import * remove outdated example * Create text, image, and audio categories * Example codes for configs should be language agnostic
- Loading branch information
1 parent
2f41ea6
commit c3e0cd8
Showing
138 changed files
with
59,853 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package com.johnsnowlabs.nlp; | ||
|
||
import com.johnsnowlabs.nlp.annotators.LemmatizerModel; | ||
import com.johnsnowlabs.nlp.annotators.Tokenizer; | ||
import com.johnsnowlabs.nlp.embeddings.EmbeddingsHelper; | ||
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline; | ||
import org.apache.spark.ml.Pipeline; | ||
import org.apache.spark.ml.PipelineModel; | ||
import org.apache.spark.ml.PipelineStage; | ||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.Encoders; | ||
import org.apache.spark.sql.Row; | ||
import org.apache.spark.sql.SparkSession; | ||
|
||
import java.util.LinkedList; | ||
|
||
public class AnnotationExamples { | ||
|
||
public static void main(String args[]) { | ||
|
||
DocumentAssembler document = new DocumentAssembler(); | ||
document.setInputCol("text"); | ||
document.setOutputCol("document"); | ||
document.setCleanupMode("disabled"); | ||
|
||
Tokenizer tokenizer = new Tokenizer(); | ||
tokenizer.setInputCols(new String[] {"document"}); | ||
tokenizer.setOutputCol("token"); | ||
|
||
Pipeline pipeline = new Pipeline(); | ||
pipeline.setStages(new PipelineStage[] {document, tokenizer}); | ||
|
||
SparkSession spark = com.johnsnowlabs.nlp.SparkNLP.start(); | ||
|
||
LinkedList<String> text = new java.util.LinkedList<String>(); | ||
|
||
text.add("Peter is a very good person"); | ||
|
||
Dataset<Row> data = spark.createDataset(text, Encoders.STRING()).toDF("text"); | ||
|
||
PipelineModel pipelineModel = pipeline.fit(data); | ||
|
||
Dataset<Row> transformed = pipelineModel.transform(data); | ||
transformed.show(); | ||
|
||
PretrainedPipeline pretrained = new PretrainedPipeline("explain_document_dl"); | ||
pretrained.transform(data).show(); | ||
|
||
LemmatizerModel lemmatizer = (LemmatizerModel) LemmatizerModel.pretrained("lemma_antbnc"); | ||
lemmatizer.setInputCols(new String[] {"token"}); | ||
lemmatizer.setOutputCol("lemma"); | ||
|
||
lemmatizer.transform(transformed).show(); | ||
|
||
LightPipeline lightPipeline = new LightPipeline(pipelineModel, true); | ||
|
||
java.util.Map<String, java.util.List<String>> result = lightPipeline.annotateJava("Peter is a very good person."); | ||
|
||
System.out.println(result.get("token")); | ||
|
||
java.util.ArrayList<String> list = new java.util.ArrayList<String>(); | ||
list.add("Peter is a good person."); | ||
list.add("Roy lives in Germany."); | ||
|
||
System.out.println(lightPipeline.annotateJava(list)); | ||
|
||
EmbeddingsHelper.load( | ||
"./random_embeddings_dim4.txt", | ||
spark, | ||
"TEXT", | ||
"random", | ||
4, | ||
false); | ||
|
||
System.out.println("\nFinished testing Spark NLP on JAVA"); | ||
|
||
} | ||
} |
2,212 changes: 2,212 additions & 0 deletions
2,212
example/java/annotation/random_embeddings_dim4.txt
Large diffs are not rendered by default.
Oops, something went wrong.
994 changes: 994 additions & 0 deletions
994
...nnotation/audio/asr-wav2vec2/Automatic_Speech_Recognition_Wav2Vec2_(Wav2Vec2ForCTC).ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.