-
Notifications
You must be signed in to change notification settings - Fork 717
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Relocating public examples back to the main repository (#13292)
* First init of relocating examples * Add Scala examples * Rename offline notebook * Fix a bad import * remove outdated example * Create text, image, and audio categories * Example codes for configs should be language agnostic
- Loading branch information
1 parent
8e9f1bb
commit 9756a7e
Showing
141 changed files
with
59,860 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,10 @@ | ||
# Spark NLP Examples | ||
|
||
We have moved all the examples to a repository dedicated to showcasing Spark NLP use cases! | ||
Under construction | ||
|
||
Let's go! [spark-nlp-workshop](https://github.com/JohnSnowLabs/spark-nlp-workshop) | ||
Required maintained examples | ||
|
||
- Python | ||
- Scala | ||
- Java | ||
- Docker |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package com.johnsnowlabs.nlp; | ||
|
||
import com.johnsnowlabs.nlp.annotators.LemmatizerModel; | ||
import com.johnsnowlabs.nlp.annotators.Tokenizer; | ||
import com.johnsnowlabs.nlp.embeddings.EmbeddingsHelper; | ||
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline; | ||
import org.apache.spark.ml.Pipeline; | ||
import org.apache.spark.ml.PipelineModel; | ||
import org.apache.spark.ml.PipelineStage; | ||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.Encoders; | ||
import org.apache.spark.sql.Row; | ||
import org.apache.spark.sql.SparkSession; | ||
|
||
import java.util.LinkedList; | ||
|
||
public class AnnotationExamples { | ||
|
||
public static void main(String args[]) { | ||
|
||
DocumentAssembler document = new DocumentAssembler(); | ||
document.setInputCol("text"); | ||
document.setOutputCol("document"); | ||
document.setCleanupMode("disabled"); | ||
|
||
Tokenizer tokenizer = new Tokenizer(); | ||
tokenizer.setInputCols(new String[] {"document"}); | ||
tokenizer.setOutputCol("token"); | ||
|
||
Pipeline pipeline = new Pipeline(); | ||
pipeline.setStages(new PipelineStage[] {document, tokenizer}); | ||
|
||
SparkSession spark = com.johnsnowlabs.nlp.SparkNLP.start(); | ||
|
||
LinkedList<String> text = new java.util.LinkedList<String>(); | ||
|
||
text.add("Peter is a very good person"); | ||
|
||
Dataset<Row> data = spark.createDataset(text, Encoders.STRING()).toDF("text"); | ||
|
||
PipelineModel pipelineModel = pipeline.fit(data); | ||
|
||
Dataset<Row> transformed = pipelineModel.transform(data); | ||
transformed.show(); | ||
|
||
PretrainedPipeline pretrained = new PretrainedPipeline("explain_document_dl"); | ||
pretrained.transform(data).show(); | ||
|
||
LemmatizerModel lemmatizer = (LemmatizerModel) LemmatizerModel.pretrained("lemma_antbnc"); | ||
lemmatizer.setInputCols(new String[] {"token"}); | ||
lemmatizer.setOutputCol("lemma"); | ||
|
||
lemmatizer.transform(transformed).show(); | ||
|
||
LightPipeline lightPipeline = new LightPipeline(pipelineModel, true); | ||
|
||
java.util.Map<String, java.util.List<String>> result = lightPipeline.annotateJava("Peter is a very good person."); | ||
|
||
System.out.println(result.get("token")); | ||
|
||
java.util.ArrayList<String> list = new java.util.ArrayList<String>(); | ||
list.add("Peter is a good person."); | ||
list.add("Roy lives in Germany."); | ||
|
||
System.out.println(lightPipeline.annotateJava(list)); | ||
|
||
EmbeddingsHelper.load( | ||
"./random_embeddings_dim4.txt", | ||
spark, | ||
"TEXT", | ||
"random", | ||
4, | ||
false); | ||
|
||
System.out.println("\nFinished testing Spark NLP on JAVA"); | ||
|
||
} | ||
} |
2,212 changes: 2,212 additions & 0 deletions
2,212
example/java/annotation/random_embeddings_dim4.txt
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.