Merge pull request allenai#3 from allenai/master

Pull from allennlp master
dirkgr · Apr 15, 2019 · aaeddef · aaeddef
2 parents 5ccf8ad + 58f386c
commit aaeddef
Show file tree

Hide file tree

Showing 423 changed files with 19,540 additions and 9,394 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -20,7 +20,7 @@ A clear and concise description of what you expected to happen.
 **System (please complete the following information):**
  - OS: [e.g. OSX, Linux]
  - Python version: [if it's not 3.6.1 or later, that's probably your problem]
- - AllenNLP version: [e.g. v0.7.0, or "I installed from master"]
+ - AllenNLP version: [e.g. v0.8.3, or "I installed from master"]
  - PyTorch version: (if you installed it yourself)
 
 **Additional context**

diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
@@ -9,7 +9,7 @@ Please first search our GitHub repository for similar questions.  If you don't f
 **System (please complete the following information):**
  - OS: [e.g. OSX, Linux]
  - Python version: [e.g. 3.6.1]
- - AllenNLP version: [e.g. v0.8.0, or "I installed from master"]
+ - AllenNLP version: [e.g. v0.8.3, or "I installed from master"]
  - PyTorch version: (if you installed it yourself)
 
 **Question**

diff --git a/.pylintrc b/.pylintrc
@@ -9,7 +9,7 @@ init-hook='import sys; sys.path.append("./")'
 
 # Add files or directories to the blacklist. They should be base names, not
 # paths.
-ignore=CVS,custom_extensions
+ignore=CVS
 
 # Add files or directories matching the regex patterns to the blacklist. The
 # regex matches against base names, not paths.
@@ -182,7 +182,7 @@ expected-line-ending-format=
 [BASIC]
 
 # Good variable names which should always be accepted, separated by a comma
-good-names=i,j,k,ex,Run,_,logger
+good-names=i,j,k,ex,Run,_,logger,f1
 
 # Bad variable names which should always be refused, separated by a comma
 bad-names=foo,bar,baz,toto,tutu,tata

diff --git a/DEPRECATION.md b/DEPRECATION.md
@@ -0,0 +1,100 @@
+# Deprecation Policy Proposal
+
+As AllenNLP matures we will inevitably need to deprecate old models, interfaces and features. This document provides guidelines that attempt to minimize churn. Reducing churn helps users build next generation models with tools they've already mastered.
+
+### Terminology note
+
+We _deprecate_ some component to communicate that a preferred alternative exists. A _deprecated_ component does not necessarily have to be _removed_ from the library.
+
+### Goals
+
+1. Promote backward compatibility.
+2. Describe code-level and release process for both deprecation and removal.
+
+### Non-goals
+
+1. Eliminate all deprecation and removal.
+2. Legislate. Good judgment by AllenNLP developers is much preferred.
+
+## Decision Flow
+
+Say you've found some old code that needs some care. You have a fix in mind, maybe already a PR. Great, let's get it merged! But let's give some thought to backward compatibility along the way.
+
+1. Determine whether backward compatibility is even an issue.
+
+  * __Likely safe__
+
+    * New code
+    * Internal code _TODO: How is this determined?_
+    * Implementation change only
+
+  * __Care required__
+
+    * File/class/function names
+      * Including names passed to `Registrable.register`!
+    * Function signatures
+    * Config API for defining models, dataset readers, etc.
+      * In effect, the `__init__` (or custom `from_params`) methods for `Registrable` classes.
+    * Model internals that affect shape of saved weights
+
+2. If so, first consider how impactful the change is. Impact should be weighed against cost. A low impact change shouldn't impose much (or even any) cost on our users.
+
+  * __Low impact__
+
+    * Minor name changes
+    * "Cleaner" APIs with no functional benefit
+
+  * __Medium impact__
+
+    * Name/API that is actively confusing multiple users.
+      * This means Github issues, messages on user channels, etc.
+    * Useful new feature
+
+  * __High impact__
+
+    * Major bugs
+
+3. If the impact merits the cost, let's try to make the change in a backward compatible manner.
+
+  * Options include:
+    * When deprecating, say, a class, leave a shim to its replacement.
+      * E.g., an existing class implementation might be replaced by subclassing its more general replacement.
+    * A function signature can likely be changed with a keyword argument.
+    * You can decorate with `Registrable.register` multiple times.
+    * Features can be hidden behind flags.
+      * E.g., simple boolean arguments to constructors.
+    * Files can simply be forked. (for extreme cases)
+      * This could occur when extending an existing model, for instance.
+      * Copying a file, modifying it, and then de-duplicating can be simpler than parameterizing existing code to handle a new use-case in a purely backwards compatible manner.
+
+  * Likely no deprecation needs to occur in this case. However, in the spirit of providing a single preferred solution, one may still mark the old component as deprecated while _not_ removing it. Usages in AllenNLP should be removed so that our warnings are not spammy. Though silencing warnings is an option, we should "eat our own dog food" or we can hardly expect our users to migrate.
+
+4. If no backward compatible change can be made, it's adivsable to first consult with other developers.
+
+  * Ideally they will propose an alternative solution or help mitigate the churn during the change.
+  * Next proceed to __Mechanics__.
+
+## Mechanics
+
+1. Mark the old component as deprecated by adding a `DeprecationWarning`.
+  * This should include whether the component will be removed.
+  * If it must be removed, describe why in an accompanying comment. Link relevant issues.
+  * [Example](https://github.com/allenai/allennlp/blob/cb9651a4c77c10cbd2d76f79b85c6453386dc229/allennlp/modules/text_field_embedders/basic_text_field_embedder.py#L141)
+  * Provide the version and date when we will remove the feature if applicable.
+    * We should support whichever is longest.
+    * The code should live for at least one full minor version and 3 months before removal.
+      * e.g., if you're committing the deprecation to master while version 0.8.4 is out, then it should live throughout version 0.9 and can first be removed in version 0.10.0.
+      * In particular, this should be at least a minor release, i.e. m.n.0.
+      * If this isn't possible, consult with other developers. You should have a compelling rationale.
+
+2. Remove any AllenNLP usages of the deprecated feature to avoid warnings.
+  * Suppressing warnings should be done rarely. [See here for instance.](https://github.com/allenai/allennlp/blob/9719b5c71207e642276fb1209ea1a4c8467e0792/allennlp/modules/token_embedders/embedding.py#L14)
+
+3. Create a Github issue for the actual removal and wait for the requisite removal release.
+  * Link the deprecation warning.
+  * Copy over the removal release date and version that you are targeting for easy issue triage.
+
+4. You or another developer should coordinate the removal PR such that it will go into the desired release.
+  * Add a "Breaking Changes" section to the release notes.
+
+5. Release
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.6.3-jessie
+FROM python:3.6.8-stretch
 
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
@@ -30,28 +30,25 @@ RUN apt-get update --fix-missing && apt-get install -y \
     libxrender1 \
     wget \
     libevent-dev \
-    build-essential && \
+    build-essential \
+    openjdk-8-jdk && \
     rm -rf /var/lib/apt/lists/*
 
-# Install Java.
-RUN echo "deb http://http.debian.net/debian jessie-backports main" >>/etc/apt/sources.list
-RUN apt-get update
-RUN apt-get install -y -t jessie-backports openjdk-8-jdk
-
 # Copy select files needed for installing requirements.
 # We only copy what we need here so small changes to the repository does not trigger re-installation of the requirements.
 COPY requirements.txt .
-COPY scripts/install_requirements.sh scripts/install_requirements.sh
-RUN ./scripts/install_requirements.sh
+RUN pip install -r requirements.txt
 
-COPY bin/ bin/
 COPY scripts/ scripts/
 COPY allennlp/ allennlp/
 COPY pytest.ini pytest.ini
 COPY .pylintrc .pylintrc
 COPY tutorials/ tutorials/
 COPY training_config training_config/
 COPY setup.py setup.py
+COPY README.md README.md
+
+RUN pip install --editable .
 
 # Compile EVALB - required for parsing evaluation.
 # EVALB produces scary looking c-level output which we don't

diff --git a/Dockerfile.pip b/Dockerfile.pip
@@ -1,7 +1,7 @@
 # This Dockerfile creates an environment suitable for downstream usage of AllenNLP.
 # It creates an environment that includes a pip installation of allennlp.
 
-FROM python:3.6.3-jessie
+FROM python:3.6.8-stretch
 
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,7 +1,6 @@
 include LICENSE
 include README.md
 include requirements.txt
-include bin/allennlp
 recursive-include allennlp *
 recursive-include scripts *
 recursive-include training_config *.json

diff --git a/README.md b/README.md
@@ -109,10 +109,11 @@ Once you have [installed Docker](https://docs.docker.com/engine/installation/)
 just run the following command to get an environment that will run on either the cpu or gpu.
 
    ```bash
-   docker run -it -p 8000:8000 --rm allennlp/allennlp:v0.8.0
+   mkdir -p $HOME/.allennlp/
+   docker run --rm -v $HOME/.allennlp:/root/.allennlp allennlp/allennlp:v0.8.3
    ```
 
-You can test the Docker environment with `docker run -it -p 8000:8000 --rm allennlp/allennlp:v0.8.0 test-install`.
+You can test the Docker environment with `docker run --rm -v $HOME/.allennlp:/root/.allennlp allennlp/allennlp:v0.8.3 test-install`.
 
 ### Installing from source
 
@@ -122,33 +123,24 @@ You can also install AllenNLP by cloning our git repository:
   git clone https://github.com/allenai/allennlp.git
   ```
 
-Create a Python 3.6 virtual environment, and install the necessary requirements by running:
-
-  ```bash
-  INSTALL_TEST_REQUIREMENTS=true scripts/install_requirements.sh
-  ```
-
-Changing the flag to false if you don't want to be able to run
-tests. Once the requirements have been installed, run:
+Create a Python 3.6 virtual environment, and install AllenNLP in `editable` mode by running:
 
   ```bash
   pip install --editable .
   ```
 
-To install the AllenNLP library in `editable` mode into your
-environment.  This will make `allennlp` available on your
-system but it will use the sources from the local clone you
-made of the source repository.
+This will make `allennlp` available on your system but it will use the sources from the local clone
+you made of the source repository.
 
-You can test your installation with `bin/allennlp test-install`.
+You can test your installation with `allennlp test-install`.
 The full development environment also requires the JVM and `perl`,
 which must be installed separately.  `./scripts/verify.py` will run
 the full suite of tests used by our continuous build environment.
 
 ## Running AllenNLP
 
 Once you've installed AllenNLP, you can run the command-line interface either
-with the `allennlp` command (if you installed via `pip`) or `bin/allennlp` (if you installed via source).
+with the `allennlp` command (if you installed via `pip`) or `allennlp` (if you installed via source).
 
 ```bash
 $ allennlp
@@ -160,17 +152,18 @@ optional arguments:
 
 Commands:
 
-    configure   Generate configuration stubs.
-    train       Train a model
-    evaluate    Evaluate the specified model + dataset
+    configure   Run the configuration wizard.
+    train       Train a model.
+    evaluate    Evaluate the specified model + dataset.
     predict     Use a trained model to make predictions.
-    make-vocab  Create a vocabulary
+    make-vocab  Create a vocabulary.
     elmo        Create word vectors using a pretrained ELMo model.
-    fine-tune   Continue training a model on a new dataset
+    fine-tune   Continue training a model on a new dataset.
     dry-run     Create a vocabulary, compute dataset statistics and other
                 training utilities.
     test-install
                 Run the unit tests.
+    find-lr     Find a learning rate range.
 ```
 
 ## Docker images
@@ -204,6 +197,16 @@ You can run the image with `docker run --rm -it allennlp/allennlp:latest`.  The
 
 You can test your installation by running  `allennlp test-install`.
 
+## Issues
+
+Everyone is welcome to file issues with either feature requests, bug reports, or general questions.  As a small team with our own internal goals, we may ask for contributions if a prompt fix doesn't fit into our roadmap.  We allow users a two week window to follow up on questions, after which we will close issues.  They can be re-opened if there is further discussion.
+
+## Contributions
+
+The AllenNLP team at AI2 (@allenai) welcomes contributions from the greater AllenNLP community, and, if you would like to get a change into the library, this is likely the fastest approach.  If you would like to contribute a larger feature, we recommend first creating an issue with a proposed design for discussion.  This will prevent you from spending significant time on an implementation which has a technical limitation someone could have pointed out early on.  Small contributions can be made directly in a pull request.
+
+Pull requests (PRs) must have one approving review and no requested changes before they are merged.  As AllenNLP is primarily driven by AI2 (@allenai) we reserve the right to reject or revert contributions that we don't think are good additions.
+
 ## Citing
 
 If you use AllenNLP in your research, please cite [AllenNLP: A Deep Semantic Natural Language Processing Platform](https://www.semanticscholar.org/paper/AllenNLP%3A-A-Deep-Semantic-Natural-Language-Platform-Gardner-Grus/a5502187140cdd98d76ae711973dbcdaf1fef46d).

diff --git a/allennlp/commands/__init__.py b/allennlp/commands/__init__.py
@@ -2,6 +2,8 @@
 import argparse
 import logging
 
+from overrides import overrides
+
 from allennlp import __version__
 from allennlp.commands.configure import Configure
 from allennlp.commands.elmo import Elmo
@@ -19,6 +21,33 @@
 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 
 
+class ArgumentParserWithDefaults(argparse.ArgumentParser):
+    """
+    Custom argument parser that will display the default value for an argument
+    in the help message.
+    """
+
+    _action_defaults_to_ignore = {"help", "store_true", "store_false", "store_const"}
+
+    @staticmethod
+    def _is_empty_default(default):
+        if default is None:
+            return True
+        if isinstance(default, (str, list, tuple, set)):
+            return not bool(default)
+        return False
+
+    @overrides
+    def add_argument(self, *args, **kwargs):
+        # pylint: disable=arguments-differ
+        # Add default value to the help message when the default is meaningful.
+        default = kwargs.get("default")
+        if kwargs.get("action") not in self._action_defaults_to_ignore and not self._is_empty_default(default):
+            description = kwargs.get("help") or ""
+            kwargs["help"] = f"{description} (default = {default})"
+        super().add_argument(*args, **kwargs)
+
+
 def main(prog: str = None,
          subcommand_overrides: Dict[str, Subcommand] = {}) -> None:
     """
@@ -27,7 +56,7 @@ def main(prog: str = None,
     work for them, unless you use the ``--include-package`` flag.
     """
     # pylint: disable=dangerous-default-value
-    parser = argparse.ArgumentParser(description="Run AllenNLP", usage='%(prog)s', prog=prog)
+    parser = ArgumentParserWithDefaults(description="Run AllenNLP", usage='%(prog)s', prog=prog)
     parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
 
     subparsers = parser.add_subparsers(title='Commands', metavar='')

diff --git a/allennlp/commands/dry_run.py b/allennlp/commands/dry_run.py
@@ -31,14 +31,14 @@
 import os
 import re
 
-from allennlp.commands.train import datasets_from_params
 from allennlp.commands.subcommand import Subcommand
 from allennlp.common.checks import ConfigurationError
 from allennlp.common.params import Params
 from allennlp.common.util import prepare_environment, get_frozen_and_tunable_parameter_names
 from allennlp.data import Vocabulary
 from allennlp.data.dataset import Batch
 from allennlp.models import Model
+from allennlp.training.util import datasets_from_params
 
 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 

diff --git a/allennlp/commands/elmo.py b/allennlp/commands/elmo.py
@@ -97,7 +97,8 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar
         subparser = parser.add_parser(
                 name, description=description, help='Create word vectors using a pretrained ELMo model.')
 
-        subparser.add_argument('input_file', type=argparse.FileType('r'), help='The path to the input file.')
+        subparser.add_argument('input_file', type=argparse.FileType('r', encoding='utf-8'),
+                               help='The path to the input file.')
         subparser.add_argument('output_file', type=str, help='The path to the output file.')
 
         group = subparser.add_mutually_exclusive_group(required=True)