From 3e27ecd2c66f9ece6f75f1aab3970f70fa475ebd Mon Sep 17 00:00:00 2001 From: Danilo Burbano Date: Fri, 9 Feb 2024 17:16:16 -0500 Subject: [PATCH] [SPARKNLP-988] Updating EntityRuler documentation --- docs/en/annotator_entries/EntityRuler.md | 6 +----- .../nlp/annotators/er/AhoCorasickAutomaton.scala | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/en/annotator_entries/EntityRuler.md b/docs/en/annotator_entries/EntityRuler.md index 95c369bdae181d..07602ac7c44314 100644 --- a/docs/en/annotator_entries/EntityRuler.md +++ b/docs/en/annotator_entries/EntityRuler.md @@ -36,8 +36,6 @@ There are multiple ways and formats to set the extraction resource. It is possib set as the "format" field in the `option` parameter map and depending on the file type, additional parameters might need to be set. -To enable regex extraction, `setEnablePatternRegex(true)` needs to be called. - If the file is in a JSON format, then the rule definitions need to be given in a list with the fields "id", "label" and "patterns": ``` @@ -110,8 +108,7 @@ entityRuler = EntityRulerApproach() \ "patterns.csv", ReadAs.TEXT, {"format": "csv", "delimiter": "\\|"} - ) \ - .setEnablePatternRegex(True) + ) pipeline = Pipeline().setStages([ documentAssembler, tokenizer, @@ -163,7 +160,6 @@ val entityRuler = new EntityRulerApproach() ReadAs.TEXT, {"format": "csv", "delimiter": "|")} ) - .setEnablePatternRegex(true) val pipeline = new Pipeline().setStages(Array( documentAssembler, diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/er/AhoCorasickAutomaton.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/er/AhoCorasickAutomaton.scala index c4f2fe05b61b0a..45c08ae781423d 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/er/AhoCorasickAutomaton.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/er/AhoCorasickAutomaton.scala @@ -203,7 +203,7 @@ class AhoCorasickAutomaton( private def getAlphabetErrorMessage(char: Char): String = { val workshopURL = "https://github.com/JohnSnowLabs/spark-nlp/" val alphabetExample = - "blob/master/examples/python/annotation/text/english/entity-ruler/EntityRuler_Alphabet.ipynb" + "blob/master/examples/python/training/english/entity-ruler/EntityRuler_Alphabet.ipynb" val errorMessage: String = s"""Char $char not found in the alphabet. Your data could have unusual characters not found |in your document's language, which requires setting up a custom alphabet.