Skip to content

Commit

Permalink
Relocating public examples back to the main repository (#13292)
Browse files Browse the repository at this point in the history
* First init of relocating examples

* Add Scala examples

* Rename offline notebook

* Fix a bad import

* remove outdated example

* Create text, image, and audio categories

* Example codes for configs should be language agnostic
  • Loading branch information
maziyarpanahi authored and Jose J. Martinez committed Feb 17, 2023
1 parent 2f41ea6 commit c3e0cd8
Show file tree
Hide file tree
Showing 138 changed files with 59,853 additions and 0 deletions.
78 changes: 78 additions & 0 deletions example/java/annotation/AnnotationExamples.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.johnsnowlabs.nlp;

import com.johnsnowlabs.nlp.annotators.LemmatizerModel;
import com.johnsnowlabs.nlp.annotators.Tokenizer;
import com.johnsnowlabs.nlp.embeddings.EmbeddingsHelper;
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.util.LinkedList;

public class AnnotationExamples {

public static void main(String args[]) {

DocumentAssembler document = new DocumentAssembler();
document.setInputCol("text");
document.setOutputCol("document");
document.setCleanupMode("disabled");

Tokenizer tokenizer = new Tokenizer();
tokenizer.setInputCols(new String[] {"document"});
tokenizer.setOutputCol("token");

Pipeline pipeline = new Pipeline();
pipeline.setStages(new PipelineStage[] {document, tokenizer});

SparkSession spark = com.johnsnowlabs.nlp.SparkNLP.start();

LinkedList<String> text = new java.util.LinkedList<String>();

text.add("Peter is a very good person");

Dataset<Row> data = spark.createDataset(text, Encoders.STRING()).toDF("text");

PipelineModel pipelineModel = pipeline.fit(data);

Dataset<Row> transformed = pipelineModel.transform(data);
transformed.show();

PretrainedPipeline pretrained = new PretrainedPipeline("explain_document_dl");
pretrained.transform(data).show();

LemmatizerModel lemmatizer = (LemmatizerModel) LemmatizerModel.pretrained("lemma_antbnc");
lemmatizer.setInputCols(new String[] {"token"});
lemmatizer.setOutputCol("lemma");

lemmatizer.transform(transformed).show();

LightPipeline lightPipeline = new LightPipeline(pipelineModel, true);

java.util.Map<String, java.util.List<String>> result = lightPipeline.annotateJava("Peter is a very good person.");

System.out.println(result.get("token"));

java.util.ArrayList<String> list = new java.util.ArrayList<String>();
list.add("Peter is a good person.");
list.add("Roy lives in Germany.");

System.out.println(lightPipeline.annotateJava(list));

EmbeddingsHelper.load(
"./random_embeddings_dim4.txt",
spark,
"TEXT",
"random",
4,
false);

System.out.println("\nFinished testing Spark NLP on JAVA");

}
}
2,212 changes: 2,212 additions & 0 deletions example/java/annotation/random_embeddings_dim4.txt

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading

0 comments on commit c3e0cd8

Please sign in to comment.