JohnSnowLabs · maziyarpanahi · Feb 15, 2023 · Dec 13, 2022 · Dec 16, 2022 · Dec 19, 2022
diff --git a/.gitignore b/.gitignore
@@ -331,3 +331,6 @@ src/*/resources/*.classes
 .bsp/sbt.json
 python/docs/_build/**
 python/docs/reference/_autosummary/**
+
+# MS Visio Code
+**/.vscode/
diff --git a/docs/en/licensed_annotator_entries/AssertionChunkConverter.md b/docs/en/licensed_annotator_entries/AssertionChunkConverter.md
@@ -10,6 +10,12 @@ model
 
 This annotator creates a `CHUNK` column with metadata useful for training an Assertion Status Detection model (see [AssertionDL](https://nlp.johnsnowlabs.com/docs/en/licensed_annotators#assertiondl)).
 
+In some cases, there may be issues while creating the chunk column when using token indices that can lead to loss of data to train assertion status models.
+
+The `AssertionChunkConverter` annotator uses both begin and end indices of the tokens as input to add a more robust metadata to the chunk column in a way that improves the reliability of the indices and avoid loss of data.
+
+> *NOTE*: Chunk begin and end indices in the assertion status model training dataframe can be populated using the new version of ALAB module.
+
 {%- endcapture -%}
 
 {%- capture model_input_anno -%}

diff --git a/docs/en/licensed_annotator_entries/NerConverterInternal.md b/docs/en/licensed_annotator_entries/NerConverterInternal.md
@@ -9,7 +9,10 @@ model
 {%- capture model_description -%}
 Converts a IOB or IOB2 representation of NER to a user-friendly one,
 by associating the tokens of recognized entities and their label.
-Chunks with no associated entity (tagged "O") are filtered.
+Chunks with no associated entity (tagged "O") are filtered out.
+
+This licensed annotator adds extra functionality to the open-source version by adding the following parameters: `blackList`, `greedyMode`,  `threshold`, and `ignoreStopWords` that are not available in the [NerConverter](https://nlp.johnsnowlabs.com/docs/en/annotators#nerconverter) annotator.
+
 See also [Inside–outside–beginning (tagging)](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)) for more information.
 {%- endcapture -%}
 

diff --git a/docs/en/licensed_annotator_entries/RENerChunksFilter.md b/docs/en/licensed_annotator_entries/RENerChunksFilter.md
@@ -7,8 +7,14 @@ model
 {%- endcapture -%}
 
 {%- capture model_description -%}
-Filters and outputs combinations of relations between extracted entities, for further processing.
-This annotator is especially useful to create inputs for the RelationExtractionDLModel.
+Filters entities' dependency relations.
+
+The annotator filters desired relation pairs (defined by the parameter realtionPairs), and store those on the output column.
+
+Filtering the possible relations can be useful to perform additional analysis for a specific use case (e.g., checking adverse drug reactions and drug realations), which can be the input for further analysis using a pretrained `RelationExtractionDLModel`.
+
+For example, the [ner_clinical](https://nlp.johnsnowlabs.com/2021/03/31/ner_clinical_en.html) NER model can identify `PROBLEM`, `TEST`, and `TREATMENT` entities. By using the `RENerChunksFilter`, one can filter only the relations between `PROBLEM` and `TREATMENT`  entities only, removing any relation between the other entities, to further analyze the  associations between clinical problems and treatments.
+
 {%- endcapture -%}
 
 {%- capture model_input_anno -%}

diff --git a/docs/en/licensed_annotator_entries/RelationExtraction.md b/docs/en/licensed_annotator_entries/RelationExtraction.md
@@ -11,11 +11,11 @@ model
 {%- endcapture -%}
 
 {%- capture model_description -%}
-Extracts and classifies instances of relations between named entities. For this, relation pairs
-need to be defined with `setRelationPairs`, to specify between which entities the extraction should be done.
+Extracts and classifies instances of relations between named entities.
 
 For pretrained models please see the
 [Models Hub](https://nlp.johnsnowlabs.com/models?task=Relation+Extraction) for available models.
+
 {%- endcapture -%}
 
 {%- capture model_input_anno -%}
@@ -220,9 +220,13 @@ val result = pipeline.fit(data).transform(data)
 {%- endcapture -%}
 
 {%- capture approach_description -%}
-Trains a TensorFlow model for relation extraction. The Tensorflow graph in `.pb` format needs to be specified with
-`setModelFile`. The result is a RelationExtractionModel.
-To start training, see the parameters that need to be set in the Parameters section.
+Trains a TensorFlow model for relation extraction. 
+
+To train a custom relation extraction model, you need to first creat a Tensorflow graph using either the `TfGraphBuilder` annotator or the `tf_graph` module. Then, set the path to the Tensorflow graph using the method `.setModelFile("path/to/tensorflow_graph.pb")`.
+
+If the parameter `relationDirectionCol` is set, the model will be trained using the direction information (see the parameter decription for details). Otherwise, the model won't have direction between the relation of the entities.
+
+After training a model (using the `.fit()` method), the resulting object is of class `RelationExtractionModel`.
 {%- endcapture -%}
 
 {%- capture approach_input_anno -%}

diff --git a/python/sparknlp/annotator/spell_check/context_spell_checker.py b/python/sparknlp/annotator/spell_check/context_spell_checker.py
@@ -92,6 +92,10 @@ class ContextSpellCheckerApproach(AnnotatorApproach):
         correction.
     configProtoBytes
         ConfigProto from tensorflow, serialized into byte array.
+    maxSentLen
+        Maximum length for a sentence - internal use during training.
+    graphFolder
+        Folder path that contain external graph files.
 
     References
     ----------
@@ -226,6 +230,16 @@ class ContextSpellCheckerApproach(AnnotatorApproach):
                              "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
                              TypeConverters.toListInt)
 
+    maxSentLen = Param(Params._dummy(),
+                       "maxSentLen",
+                       "Maximum length of a sentence to be considered for training.",
+                       typeConverter=TypeConverters.toInt)
+
+    graphFolder = Param(Params._dummy(),
+                        "graphFolder",
+                        "Folder path that contain external graph files.",
+                        typeConverter=TypeConverters.toString)
+
     def setLanguageModelClasses(self, count):
         """Sets number of classes to use during factorization of the softmax
         output in the Language Model.
@@ -415,6 +429,26 @@ def setConfigProtoBytes(self, b):
         """
         return self._set(configProtoBytes=b)
 
+    def setGraphFolder(self, path):
+        """Sets folder path that contain external graph files.
+
+        Parameters
+        ----------
+        path : str
+            Folder path that contain external graph files.
+        """
+        return self._set(graphFolder=path)
+
+    def setMaxSentLen(self, sentlen):
+        """Sets the maximum length of a sentence.
+
+        Parameters
+        ----------
+        sentlen : int
+            Maximum length of a sentence
+        """
+        return self._set(maxSentLen=sentlen)
+
     def addVocabClass(self, label, vocab, userdist=3):
         """Adds a new class of words to correct, based on a vocabulary.
 
@@ -513,10 +547,22 @@ class ContextSpellCheckerModel(AnnotatorModel, HasEngine):
     correctSymbols
         Whether to correct special symbols or skip spell checking for them
     compareLowcase
-        If true will compare tokens in low case with vocabulary
+        If true will compare tokens in low case with vocabulary.
     configProtoBytes
         ConfigProto from tensorflow, serialized into byte array.
-
+    vocabFreq
+        Frequency words from the vocabulary.
+    idsVocab
+        Mapping of ids to vocabulary.
+    vocabIds
+        Mapping of vocabulary to ids.
+    classes
+        Classes the spell checker recognizes.
+    weights
+        Levenshtein weights.
+    useNewLines
+        When set to true new lines will be treated as any other character. When set to false correction is applied on paragraphs as defined by newline characters.
+
 
     References
     -------------
@@ -612,6 +658,31 @@ class ContextSpellCheckerModel(AnnotatorModel, HasEngine):
                              "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
                              TypeConverters.toListInt)
 
+    vocabFreq = Param(
+        Params._dummy(),
+        "vocabFreq",
+        "Frequency words from the vocabulary.",
+        TypeConverters.identity,
+    )
+    idsVocab = Param(
+        Params._dummy(),
+        "idsVocab",
+        "Mapping of ids to vocabulary.",
+        TypeConverters.identity,
+    )
+    vocabIds = Param(
+        Params._dummy(),
+        "vocabIds",
+        "Mapping of vocabulary to ids.",
+        TypeConverters.identity,
+    )
+    classes = Param(
+        Params._dummy(),
+        "classes",
+        "Classes the spell checker recognizes.",
+        TypeConverters.identity,
+    )
+
     def setWordMaxDistance(self, dist):
         """Sets maximum distance for the generated candidates for every word.
 
@@ -706,6 +777,46 @@ def setConfigProtoBytes(self, b):
         """
         return self._set(configProtoBytes=b)
 
+    def setVocabFreq(self, value: dict):
+        """Sets frequency words from the vocabulary.
+
+        Parameters
+        ----------
+        value : dict
+            Frequency words from the vocabulary.
+        """
+        return self._set(vocabFreq=value)
+
+    def setIdsVocab(self, idsVocab: dict):
+        """Sets mapping of ids to vocabulary.
+
+        Parameters
+        ----------
+        idsVocab : dict
+            Mapping of ids to vocabulary.
+        """
+        return self._set(idsVocab=idsVocab)
+
+    def setVocabIds(self, vocabIds: dict):
+        """Sets mapping of vocabulary to ids.
+
+        Parameters
+        ----------
+        vocabIds : dict
+            Mapping of vocabulary to ids.
+        """
+        return self._set(vocabIds=vocabIds)
+
+    def setClasses(self, value):
+        """Sets classes the spell checker recognizes.
+
+        Parameters
+        ----------
+        value : list
+            Classes the spell checker recognizes.
+        """
+        return self._set(classes=value)
+
     def getWordClasses(self):
         """Gets the classes of words to be corrected.