Skip to content

Commit

Permalink
Merge pull request #63 from JohnSnowLabs/remove_regex_ner
Browse files Browse the repository at this point in the history
Remove Regex NER
  • Loading branch information
saif-ellafi authored Dec 16, 2017
2 parents 0840585 + 504f2db commit 91d8acb
Show file tree
Hide file tree
Showing 10 changed files with 0 additions and 348 deletions.
30 changes: 0 additions & 30 deletions python/sparknlp/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,36 +234,6 @@ class PerceptronModel(JavaModel, JavaMLWritable, JavaMLReadable, AnnotatorProper
name = "PerceptronModel"


class NERRegexApproach(JavaEstimator, JavaMLWritable, JavaMLReadable, AnnotatorProperties):
corpusPath = Param(Params._dummy(),
"corpusPath",
"corpus path",
typeConverter=TypeConverters.toString)

@keyword_only
def __init__(self):
super(NERRegexApproach, self).__init__()
self._java_obj = self._new_java_obj("com.johnsnowlabs.nlp.annotators.ner.regex.NERRegexApproach", self.uid)
kwargs = self._input_kwargs
self._setDefault(corpusPath="__default")
self.setParams(**kwargs)

def setParams(self, corpusPath="__default"):
kwargs = self._input_kwargs
return self._set(**kwargs)

def setCorpusPath(self, value):
self._set(corpusPath=value)
return self

def _create_model(self, java_model):
return NERRegexModel(java_model)


class NERRegexModel(JavaModel, JavaMLWritable, JavaMLReadable, AnnotatorProperties):
name = "NERRegexModel"


class SentenceDetectorModel(AnnotatorTransformer):

useAbbreviations = Param(Params._dummy(),
Expand Down
26 changes: 0 additions & 26 deletions python/test/annotators.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,32 +135,6 @@ def runTest(self):
pos_tagger.transform(tokenized).show()


class RegexNERApproachTestSpec(unittest.TestCase):

def setUp(self):
self.data = SparkContextForNER.data

def runTest(self):
document_assembler = DocumentAssembler() \
.setInputCol("_c0") \
.setOutputCol("document")
sentence_detector = SentenceDetectorModel() \
.setInputCols(["document"]) \
.setOutputCol("sentence")
tokenizer = RegexTokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
ner_tagger = NERRegexApproach() \
.setInputCols(["sentence"]) \
.setOutputCol("NER") \
.fit(self.data)
assembled = document_assembler.transform(self.data)
sentenced = sentence_detector.transform(assembled)
tokenized = tokenizer.transform(sentenced)
result = ner_tagger.transform(tokenized)
result.select("NER").take(10)


class PragmaticSBDTestSpec(unittest.TestCase):

def setUp(self):
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

11 changes: 0 additions & 11 deletions src/test/scala/com/johnsnowlabs/nlp/AnnotatorBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package com.johnsnowlabs.nlp

import com.johnsnowlabs.nlp.annotators._
import com.johnsnowlabs.nlp.annotators.ner.crf.{NerCrfApproach, NerCrfModel}
import com.johnsnowlabs.nlp.annotators.ner.regex.NERRegexApproach
import com.johnsnowlabs.nlp.annotators.parser.dep.DependencyParser
import com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronApproach
import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetectorModel
Expand Down Expand Up @@ -124,16 +123,6 @@ object AnnotatorBuilder extends FlatSpec { this: Suite =>
spellChecker.fit(withFullNormalizer(dataset)).transform(withFullNormalizer(dataset))
}

def withNERTagger(dataset: Dataset[Row]): Dataset[Row] = {
val nerTagger = new NERRegexApproach()
.setInputCols(Array("sentence"))
.setOutputCol("ner")
.setCorpusPath("/ner-corpus/dict.txt")
nerTagger
.fit(withFullPragmaticSentenceDetector(dataset))
.transform(withFullPragmaticSentenceDetector(dataset))
}

def withDependencyParser(dataset: Dataset[Row]): Dataset[Row] = {
val df = withFullPOSTagger(withTokenizer(dataset))
new DependencyParser()
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

0 comments on commit 91d8acb

Please sign in to comment.