Merge remote-tracking branch 'origin/master'

JohnSnowLabs · Feb 9, 2023 · 6cba0fe · 6cba0fe
2 parents 294c549 + b103fa6
commit 6cba0fe
Show file tree

Hide file tree

Showing 1,857 changed files with 198,661 additions and 84,518 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1,2 +1,3 @@
 python/example/* linguist-vendored
 *.ipynb linguist-vendored
+examples/* linguist-vendored
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,105 @@
+name: Bug report
+description: File a bug/issue to help us improve Spark NLP. Thank you for contributing!
+labels: [bug]
+assignees: "maziyarpanahi"
+
+body:
+  - type: checkboxes
+    attributes:
+      label: Is there an existing issue for this?
+      description: Please search to see if an issue already exists for the bug you encountered.
+      options:
+        - label: I have searched the existing issues and did not find a match.
+          required: true
+  - type: textarea
+    attributes:
+      label: Who can help?
+      description: |
+        Your issue will be processed faster, if you can tag the right person for it.
+        If you know how to use `git blame`, then you can also tag the person directly.
+        Otherwise we will get the right person to help you.
+  - type: textarea
+    attributes:
+      label: What are you working on?
+      description: |
+        A brief description on the context of the issue. Is it an official example?
+        Is it a published or custom task/dataset (GLUE/SQuAD, etc.)?
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Current Behavior
+      description: A concise description of what you're experiencing.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+      description: A concise description of what you expected to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Steps To Reproduce
+      description: |
+        Please provide information on how to reproduce the issue. This could be a link to
+        Google Colab or Databricks or any other notebook. Alternatively, it can be a
+        pipeline (that is formatted in Markdown).
+        If you have any error logs and stack traces, attach them here as well.
+      placeholder: |
+        A link to an end-to-end Colab/Jupyter notebook such as https://colab.research.google.com/...
+        or a full pipeline code snippet:
+
+        ```python
+        import sparknlp
+        ...
+        ```
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: |
+        ## Environment
+        Please provide us with information about your environment. If you can provide more information for us, we can resolve the issue faster.
+  - type: textarea
+    attributes:
+      label: Spark NLP version and Apache Spark
+      description: Result of `sparknlp.version() and spark.version`
+      placeholder: |
+        import sparknlp
+        sparknlp.version()
+        spark.version
+    validations:
+      required: true
+  - type: dropdown
+    attributes:
+      label: Type of Spark Application
+      multiple: true
+      options: ["spark-shell", "spark-submit", "Scala Application", "Python Appliation", "Java Application"]
+  - type: input
+    attributes:
+      label: Java Version
+      description: Result of `java -version`
+  - type: input
+    attributes:
+      label: Java Home Directory
+      description: Result of `echo $JAVA_HOME` or `JAVA_HOME` environment variable for windows
+  - type: input
+    attributes:
+      label: Setup and installation
+      description: How you set up Spark NLP, e.g. Pypi, Conda, Maven, sbt, etc.
+  - type: input
+    attributes:
+      label: Operating System and Version
+  - type: input
+    attributes:
+      label: Link to your project (if available)
+  - type: textarea
+    attributes:
+      label: Additional Information
+      description: |
+        Links? References? Anything that will give us more context about the issue you are encountering.
+
+        Tip: You can attach files by clicking this area to highlight it and then dragging them in.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,9 @@
+blank_issues_enabled: true
+
+contact_links:
+  - name: Converting models from other libraries? Visit the Discussion to see which are compatible.
+    url: https://github.com/JohnSnowLabs/spark-nlp/discussions/5669
+    about: Discussion about importing models from other libraries
+  - name: Want to contribute a model? Visit the NLP Models Hub to upload your model.
+    url: https://nlp.johnsnowlabs.com/models
+    about: A place for sharing and discovering Spark NLP models and pipelines 
diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.md b/.github/ISSUE_TEMPLATE/doc_improvement.md
diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.yml b/.github/ISSUE_TEMPLATE/doc_improvement.yml
@@ -0,0 +1,14 @@
+name: Documentation Improvement
+description: File an issue to suggest edits to the documentation.
+labels: [documentation]
+assignees: "DevinTDHa"
+
+body:
+  - type: textarea
+    attributes:
+      label: Link to the documentation pages (if available)
+  - type: textarea
+    attributes:
+      label: How could the documentation be improved?
+    validations:
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,34 @@
+name: "Feature Request"
+description: Suggest a new Spark NLP feature
+labels: [ "Feature request" ]
+assignees: "maziyarpanahi"
+
+body:
+  - type: textarea
+    id: description
+    validations:
+      required: true
+    attributes:
+      label: Description
+      description: |
+        Is your feature request related to a problem? Why do you want this feature? 
+        Please provide a clear and concise description of what the problem is. Also link GitHub issues when applicable.
+      placeholder: |
+        I have this problem I want to solve in Spark NLP, but ...
+        Implementing this new feature would help ...
+  - type: textarea
+    id: solution
+    validations:
+      required: true
+    attributes:
+      label: Preferred Solution
+      description: |
+        A clear and concise description of what you want to happen.
+  - type: textarea
+    id: additional-context
+    validations:
+      required: true
+    attributes:
+      label: Additional Context
+      description: |
+        Add any other context or screenshots about the feature request here.
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -54,7 +54,7 @@ jobs:
       - name: Install Python packages (Python 3.7)
         run: |
           python -m pip install --upgrade pip
-          pip install pyspark==3.3.0 numpy pytest
+          pip install pyspark==3.3.1 numpy pytest
       - name: Build Spark NLP on Apache Spark 3.3.0
         run: |
           brew install sbt

diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,30 @@
+========
+4.3.0
+========
+----------------
+New Features
+----------------
+* Implement HubertForCTC annotator for automatic speech recognition
+* Implement SwinForImageClassification annotator for Image Classification
+* Introducing CamemBERT for Question Answering annotator
+* Implement ZeroShotNerModel annotator for zero-shot NER baed on RoBERTa architecture
+* Implement Date2Chunk annotator
+* Enable params argument in spark_nlp start() function
+* Allow doc_id reading CoNLL file datasets
+
+----------------
+Bug Fixes & Enhancements
+----------------
+* Relocating all notebooks back to examples directory
+* Improve download/loading models & pipelines from AWS and GCP. When setting `cache_pretrained` directory to AWS and GCP will avoid copying existing models/pipelines
+* Improve GitHub templates for Bug reports, documentation, and feature request
+* Add documentation to ResourceDownloader
+* Refactor `ml` package to allow another DL engine in future
+* Apache Spark 3.3.1 is now the base version of Spark NLP
+* Spark NLP supports M2 in addition to M1. Therefore, we are renaming `spark-nlp-m1` to `spark-nlp-silicon` on Maven
+* Fix calculating delimiter id in CamemBERT
+* Fix loadSavedModel for private buckets
+
 ========
 4.2.8
 ========