allenai · DeNeutoy · Jun 28, 2017 · Jun 26, 2017 · Jun 26, 2017 · Jun 26, 2017
diff --git a/.pylintrc b/.pylintrc
@@ -65,7 +65,7 @@ confidence=
 # --enable=similarities". If you want to run only the classes checker, but have
 # no Warning level messages displayed, use"--disable=all --enable=classes
 # --disable=W"
-disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,missing-docstring,too-many-arguments,too-many-locals,too-many-statements,too-many-branches,too-many-nested-blocks,too-many-instance-attributes,fixme,too-few-public-methods
+disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,missing-docstring,too-many-arguments,too-many-locals,too-many-statements,too-many-branches,too-many-nested-blocks,too-many-instance-attributes,fixme,too-few-public-methods,no-else-return
 
 
 [REPORTS]

diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,26 @@
+# use new container-based travis workers
+sudo: false
+dist: trusty
+
+language: python
+
+cache:
+  directories:
+  - /home/travis/nltk_data
+  - /home/travis/download
+  - /home/travis/miniconda3
+  - /home/travis/.cache/pip
+
+env:
+  matrix:
+    # run tests, submit coverage info
+    - COVERAGE="true"
+    # pylint checks skip the tests
+    - RUN_PYLINT="true" SKIP_TESTS="true"
+
+notifications:
+  email: false
+
+install: source build_tools/travis/install.sh
+script: bash build_tools/travis/test_script.sh
+after_success: source build_tools/travis/after_success.sh
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+[![Build Status](https://travis-ci.org/allenai/allennlp.svg?branch=master)](https://travis-ci.org/allenai/allennlp)
+[![codecov](https://codecov.io/gh/allenai/allennlp/branch/master/graph/badge.svg)](https://codecov.io/gh/allenai/allennlp)
 # AllenNLP
 
 A [Apache 2.0](https://github.com/allenai/allennlp/blob/master/LICENSE) natural language processing toolkit using state-of-the-art deep learning models.

diff --git a/allennlp/common/params.py b/allennlp/common/params.py
@@ -52,12 +52,12 @@ class Params(MutableMapping):
     # and passing no value to the default parameter of "pop".
     DEFAULT = object()
 
-    def __init__(self, params: Dict[str, Any], history: str=""):
+    def __init__(self, params: Dict[str, Any], history: str = ""):
         self.params = params
         self.history = history
 
     @overrides
-    def pop(self, key: str, default: Any=DEFAULT):
+    def pop(self, key: str, default: Any = DEFAULT):
         """
         Performs the functionality associated with dict.pop(key), along with checking for
         returned dictionaries, replacing them with Param objects with an updated history.
@@ -76,7 +76,7 @@ def pop(self, key: str, default: Any=DEFAULT):
         return self.__check_is_dict(key, value)
 
     @overrides
-    def get(self, key: str, default: Any=DEFAULT):
+    def get(self, key: str, default: Any = DEFAULT):
         """
         Performs the functionality associated with dict.get(key) but also checks for returned
         dicts and returns a Params object in their place with an updated history.
@@ -90,7 +90,7 @@ def get(self, key: str, default: Any=DEFAULT):
             value = self.params.get(key, default)
         return self.__check_is_dict(key, value)
 
-    def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool=False):
+    def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool = False):
         """
         Gets the value of ``key`` in the ``params`` dictionary, ensuring that the value is one of
         the given choices. Note that this `pops` the key from params, modifying the dictionary,
@@ -156,7 +156,7 @@ def assert_empty(self, class_name: str):
         was one.  ``class_name`` should be the name of the `calling` class, the one that got extra
         parameters (if there are any).
         """
-        if len(self.params) != 0:
+        if self.params:
             raise ConfigurationError("Extra parameters passed to {}: {}".format(class_name, self.params))
 
     def __getitem__(self, key):
@@ -181,15 +181,14 @@ def __check_is_dict(self, new_history, value):
         if isinstance(value, dict):
             new_history = self.history + new_history + "."
             return Params(value, new_history)
-        else:
-            return value
+        return value
 
 
 def pop_choice(params: Dict[str, Any],
                key: str,
                choices: List[Any],
-               default_to_first_choice: bool=False,
-               history: str="?.") -> Any:
+               default_to_first_choice: bool = False,
+               history: str = "?.") -> Any:
     """
     Performs the same function as :func:`Params.pop_choice`, but is required in order to deal with
     places that the Params object is not welcome, such as inside Keras layers.  See the docstring

diff --git a/allennlp/common/tee_logger.py b/allennlp/common/tee_logger.py
@@ -24,7 +24,7 @@ def write(self, message):
         # correctly, so we'll just make sure that each batch shows up on its one line.
         if '\x08' in message:
             message = message.replace('\x08', '')
-            if len(message) == 0 or message[-1] != '\n':
+            if not message or message[-1] != '\n':
                 message += '\n'
         self.log.write(message)
 

diff --git a/allennlp/common/util.py b/allennlp/common/util.py
@@ -20,8 +20,8 @@ def group_by_count(iterable: List[Any], count: int, default_value: Any) -> List[
 
 def pad_sequence_to_length(sequence: List,
                            desired_length: int,
-                           default_value: Callable[[], Any]=lambda: 0,
-                           padding_on_right: bool=True) -> List:
+                           default_value: Callable[[], Any] = lambda: 0,
+                           padding_on_right: bool = True) -> List:
     """
     Take a list of objects and pads it to the desired length, returning the padded list.  The
     original list is not modified.
@@ -73,8 +73,8 @@ def add_noise_to_dict_values(dictionary: Dict[Any, float], noise_param: float) -
 
 
 def clean_layer_name(input_name: str,
-                     strip_right_of_last_backslash: bool=True,
-                     strip_numerics_after_underscores: bool=True):
+                     strip_right_of_last_backslash: bool = True,
+                     strip_numerics_after_underscores: bool = True):
     """
     There exist cases when layer names need to be concatenated in order to create new, unique
     layer names. However, the indices added to layer names designating the ith output of calling
@@ -114,5 +114,4 @@ def namespace_match(pattern: str, namespace: str):
         return True
     elif pattern == namespace:
         return True
-    else:
-        return False
+    return False
diff --git a/allennlp/data/data_generator.py b/allennlp/data/data_generator.py
@@ -103,7 +103,7 @@ def __init__(self, text_trainer, params: Params):
         #: this data.
         self.last_num_batches = None
 
-    def create_generator(self, dataset: Dataset, batch_size: int=None):
+    def create_generator(self, dataset: Dataset, batch_size: int = None):
         """
         Main external API call: converts an ``IndexedDataset`` into a data generator suitable for
         use with Keras' ``fit_generator`` and related methods.
@@ -177,8 +177,8 @@ def __adaptive_grouping(self, instances: List[Instance]):
 
     @staticmethod
     def sort_dataset_by_padding(dataset: Dataset,
-                                sorting_keys: List[Tuple[str, str]],
-                                padding_noise: float=0.0) -> List[Instance]:
+                                sorting_keys: List[Tuple[str, str]],  # pylint: disable=invalid-sequence-index
+                                padding_noise: float = 0.0) -> List[Instance]:
         """
         Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in
         ``sorting_keys`` (in the order in which they are provided).  ``sorting_keys`` is a list of

diff --git a/allennlp/data/dataset.py b/allennlp/data/dataset.py
@@ -75,8 +75,8 @@ def get_padding_lengths(self) -> Dict[str, Dict[str, int]]:
         return {**padding_lengths}
 
     def as_arrays(self,
-                  padding_lengths: Dict[str, Dict[str, int]]=None,
-                  verbose: bool=True) -> Dict[str, List[numpy.array]]:
+                  padding_lengths: Dict[str, Dict[str, int]] = None,
+                  verbose: bool = True) -> Dict[str, List[numpy.array]]:
         """
         This method converts this ``Dataset`` into a set of numpy arrays that can be passed through
         a model.  In order for the numpy arrays to be valid arrays, all ``Instances`` in this

diff --git a/allennlp/data/dataset_readers/language_modeling.py b/allennlp/data/dataset_readers/language_modeling.py
@@ -37,9 +37,9 @@ class LanguageModelingReader(DatasetReader):
     """
     def __init__(self,
                  filename: str,
-                 tokens_per_instance: int=None,
-                 tokenizer: Tokenizer=WordTokenizer(),
-                 token_indexers: List[TokenIndexer]=None):
+                 tokens_per_instance: int = None,
+                 tokenizer: Tokenizer = WordTokenizer(),
+                 token_indexers: List[TokenIndexer] = None):
         self._filename = filename
         self._tokens_per_instance = tokens_per_instance
         self._tokenizer = tokenizer

diff --git a/allennlp/data/dataset_readers/snli.py b/allennlp/data/dataset_readers/snli.py
@@ -29,8 +29,8 @@ class SnliReader(DatasetReader):
     """
     def __init__(self,
                  snli_filename: str,
-                 tokenizer: Tokenizer=WordTokenizer(),
-                 token_indexers: List[TokenIndexer]=None):
+                 tokenizer: Tokenizer = WordTokenizer(),
+                 token_indexers: List[TokenIndexer] = None):
         self._snli_filename = snli_filename
         self._tokenizer = tokenizer
         if token_indexers is None:

diff --git a/allennlp/data/dataset_readers/squad.py b/allennlp/data/dataset_readers/squad.py
@@ -49,9 +49,9 @@ class SquadSentenceSelectionReader(DatasetReader):
     """
     def __init__(self,
                  squad_filename: str,
-                 negative_sentence_selection: str="paragraph",
-                 tokenizer: Tokenizer=WordTokenizer(),
-                 token_indexers: List[TokenIndexer]=None):
+                 negative_sentence_selection: str = "paragraph",
+                 tokenizer: Tokenizer = WordTokenizer(),
+                 token_indexers: List[TokenIndexer] = None):
         self._squad_filename = squad_filename
         self._negative_sentence_selection_methods = negative_sentence_selection.split(",")
         self._tokenizer = tokenizer
@@ -73,7 +73,9 @@ def __init__(self,
         # Maps question indices to question strings
         self._id_to_question = {}
 
-    def _get_sentence_choices(self, question_id: int, answer_id: int) -> Tuple[List[str], int]:
+    def _get_sentence_choices(self,
+                              question_id: int,
+                              answer_id: int) -> Tuple[List[str], int]:  # pylint: disable=invalid-sequence-index
         # Because sentences and questions have different indices, we need this to hold tuples of
         # ("sentence", id) or ("question", id), instead of just single ids.
         negative_sentences = set()

diff --git a/allennlp/data/fields/label_field.py b/allennlp/data/fields/label_field.py
@@ -37,8 +37,8 @@ class LabelField(Field):
     """
     def __init__(self,
                  label: Union[str, int],
-                 label_namespace: str='labels',
-                 num_labels: int=None):
+                 label_namespace: str = 'labels',
+                 num_labels: int = None):
         self._label = label
         self._label_namespace = label_namespace
         if num_labels is None:

diff --git a/allennlp/data/fields/tag_field.py b/allennlp/data/fields/tag_field.py
@@ -35,7 +35,7 @@ class TagField(Field):
         integers for you, and this parameter tells the ``Vocabulary`` object which mapping from
         strings to integers to use (so that "O" as a tag doesn't get the same id as "O" as a word).
     """
-    def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str='tags'):
+    def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str = 'tags'):
         self._tags = tags
         self._sequence_field = sequence_field
         self._tag_namespace = tag_namespace

diff --git a/allennlp/data/token_indexers/single_id_token_indexer.py b/allennlp/data/token_indexers/single_id_token_indexer.py
@@ -20,7 +20,7 @@ class SingleIdTokenIndexer(TokenIndexer):
         If ``True``, we will call ``token.lower()`` before getting an index for the token from the
         vocabulary.
     """
-    def __init__(self, token_namespace: str='tokens', lowercase_tokens: bool=False):
+    def __init__(self, token_namespace: str = 'tokens', lowercase_tokens: bool = False):
         self.token_namespace = token_namespace
         self.lowercase_tokens = lowercase_tokens
 

diff --git a/allennlp/data/token_indexers/token_characters_indexer.py b/allennlp/data/token_indexers/token_characters_indexer.py
@@ -26,8 +26,8 @@ class TokenCharactersIndexer(TokenIndexer):
         retains casing.
     """
     def __init__(self,
-                 character_namespace: str='token_characters',
-                 character_tokenizer: CharacterTokenizer=CharacterTokenizer()):
+                 character_namespace: str = 'token_characters',
+                 character_tokenizer: CharacterTokenizer = CharacterTokenizer()):
         self.character_namespace = character_namespace
         self.character_tokenizer = character_tokenizer
 

diff --git a/allennlp/data/tokenizers/character_tokenizer.py b/allennlp/data/tokenizers/character_tokenizer.py
@@ -25,7 +25,7 @@ class CharacterTokenizer(Tokenizer):
         operation.  You probably do not want to do this, as character vocabularies are generally
         not very large to begin with, but it's an option if you really want it.
     """
-    def __init__(self, byte_encoding: str=None, lowercase_characters: bool=False):
+    def __init__(self, byte_encoding: str = None, lowercase_characters: bool = False):
         self.byte_encoding = byte_encoding
         self.lowercase_characters = lowercase_characters
 

diff --git a/allennlp/data/tokenizers/word_tokenizer.py b/allennlp/data/tokenizers/word_tokenizer.py
@@ -30,9 +30,9 @@ class WordTokenizer(Tokenizer):
         The :class:`WordStemmer` to use.  Default is no stemming.
     """
     def __init__(self,
-                 word_splitter: WordSplitter=SimpleWordSplitter(),
-                 word_filter: WordFilter=PassThroughWordFilter(),
-                 word_stemmer: WordStemmer=PassThroughWordStemmer()):
+                 word_splitter: WordSplitter = SimpleWordSplitter(),
+                 word_filter: WordFilter = PassThroughWordFilter(),
+                 word_stemmer: WordStemmer = PassThroughWordStemmer()):
         self.word_splitter = word_splitter
         self.word_filter = word_filter
         self.word_stemmer = word_stemmer

diff --git a/allennlp/data/vocabulary.py b/allennlp/data/vocabulary.py
@@ -1,8 +1,8 @@
 from collections import defaultdict
 from typing import Any, Callable, Dict, List, Union
+
 import codecs
 import logging
-
 import tqdm
 
 from ..common.util import namespace_match
@@ -123,10 +123,10 @@ class Vocabulary:
         label fields in this code), you don't have to specify anything here.
     """
     def __init__(self,
-                 counter: Dict[str, Dict[str, int]]=None,
-                 min_count: int=1,
-                 max_vocab_size: Union[int, Dict[str, int]]=None,
-                 non_padded_namespaces: List[str]=None):
+                 counter: Dict[str, Dict[str, int]] = None,
+                 min_count: int = 1,
+                 max_vocab_size: Union[int, Dict[str, int]] = None,
+                 non_padded_namespaces: List[str] = None):
         self._padding_token = "@@PADDING@@"
         self._oov_token = "@@UNKOWN@@"
         if non_padded_namespaces is None:
@@ -150,7 +150,7 @@ def __init__(self,
                     if count >= min_count:
                         self.add_token_to_namespace(token, namespace)
 
-    def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
+    def set_from_file(self, filename: str, oov_token: str, namespace: str = "tokens"):
         """
         If you already have a vocabulary file for a trained model somewhere, and you really want to
         use that vocabulary file instead of just setting the vocabulary from a dataset, for
@@ -182,9 +182,9 @@ def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
     @classmethod
     def from_dataset(cls,
                      dataset,
-                     min_count: int=1,
-                     max_vocab_size: Union[int, Dict[str, int]]=None,
-                     non_padded_namespaces: List[str]=None) -> 'Vocabulary':
+                     min_count: int = 1,
+                     max_vocab_size: Union[int, Dict[str, int]] = None,
+                     non_padded_namespaces: List[str] = None) -> 'Vocabulary':
         """
         Constructs a vocabulary given a :class:`.Dataset` and some parameters.  We count all of the
         vocabulary items in the dataset, then pass those counts, and the other parameters, to
@@ -200,7 +200,7 @@ def from_dataset(cls,
                           max_vocab_size=max_vocab_size,
                           non_padded_namespaces=non_padded_namespaces)
 
-    def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
+    def add_token_to_namespace(self, token: str, namespace: str = 'tokens') -> int:
         """
         Adds ``token`` to the index, if it is not already present.  Either way, we return the index of
         the token.
@@ -213,17 +213,17 @@ def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
         else:
             return self._token_to_index[namespace][token]
 
-    def get_index_to_token_vocabulary(self, namespace: str='tokens') -> Dict[int, str]:
+    def get_index_to_token_vocabulary(self, namespace: str = 'tokens') -> Dict[int, str]:
         return self._index_to_token[namespace]
 
-    def get_token_index(self, token: str, namespace: str='tokens') -> int:
+    def get_token_index(self, token: str, namespace: str = 'tokens') -> int:
         if token in self._token_to_index[namespace]:
             return self._token_to_index[namespace][token]
         else:
             return self._token_to_index[namespace][self._oov_token]
 
-    def get_token_from_index(self, index: int, namespace: str='tokens') -> str:
+    def get_token_from_index(self, index: int, namespace: str = 'tokens') -> str:
         return self._index_to_token[namespace][index]
 
-    def get_vocab_size(self, namespace: str='tokens') -> int:
+    def get_vocab_size(self, namespace: str = 'tokens') -> int:
         return len(self._token_to_index[namespace])