allenai · DeNeutoy · Jun 28, 2017 · Jun 26, 2017 · Jun 26, 2017 · Jun 26, 2017
diff --git a/allennlp/common/params.py b/allennlp/common/params.py
@@ -52,12 +52,12 @@ class Params(MutableMapping):
     # and passing no value to the default parameter of "pop".
     DEFAULT = object()
 
-    def __init__(self, params: Dict[str, Any], history: str=""):
+    def __init__(self, params: Dict[str, Any], history: str = ""):
         self.params = params
         self.history = history
 
     @overrides
-    def pop(self, key: str, default: Any=DEFAULT):
+    def pop(self, key: str, default: Any = DEFAULT):
         """
         Performs the functionality associated with dict.pop(key), along with checking for
         returned dictionaries, replacing them with Param objects with an updated history.
@@ -76,7 +76,7 @@ def pop(self, key: str, default: Any=DEFAULT):
         return self.__check_is_dict(key, value)
 
     @overrides
-    def get(self, key: str, default: Any=DEFAULT):
+    def get(self, key: str, default: Any = DEFAULT):
         """
         Performs the functionality associated with dict.get(key) but also checks for returned
         dicts and returns a Params object in their place with an updated history.
@@ -90,7 +90,7 @@ def get(self, key: str, default: Any=DEFAULT):
             value = self.params.get(key, default)
         return self.__check_is_dict(key, value)
 
-    def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool=False):
+    def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool = False):
         """
         Gets the value of ``key`` in the ``params`` dictionary, ensuring that the value is one of
         the given choices. Note that this `pops` the key from params, modifying the dictionary,
@@ -156,7 +156,7 @@ def assert_empty(self, class_name: str):
         was one.  ``class_name`` should be the name of the `calling` class, the one that got extra
         parameters (if there are any).
         """
-        if len(self.params) != 0:
+        if self.params:
             raise ConfigurationError("Extra parameters passed to {}: {}".format(class_name, self.params))
 
     def __getitem__(self, key):
@@ -181,15 +181,14 @@ def __check_is_dict(self, new_history, value):
         if isinstance(value, dict):
             new_history = self.history + new_history + "."
             return Params(value, new_history)
-        else:
-            return value
+        return value
 
 
 def pop_choice(params: Dict[str, Any],
                key: str,
                choices: List[Any],
-               default_to_first_choice: bool=False,
-               history: str="?.") -> Any:
+               default_to_first_choice: bool = False,
+               history: str = "?.") -> Any:
     """
     Performs the same function as :func:`Params.pop_choice`, but is required in order to deal with
     places that the Params object is not welcome, such as inside Keras layers.  See the docstring

diff --git a/allennlp/common/tee_logger.py b/allennlp/common/tee_logger.py
@@ -24,7 +24,7 @@ def write(self, message):
         # correctly, so we'll just make sure that each batch shows up on its one line.
         if '\x08' in message:
             message = message.replace('\x08', '')
-            if len(message) == 0 or message[-1] != '\n':
+            if message or message[-1] != '\n':
                 message += '\n'
         self.log.write(message)
 

diff --git a/allennlp/common/util.py b/allennlp/common/util.py
@@ -20,8 +20,8 @@ def group_by_count(iterable: List[Any], count: int, default_value: Any) -> List[
 
 def pad_sequence_to_length(sequence: List,
                            desired_length: int,
-                           default_value: Callable[[], Any]=lambda: 0,
-                           padding_on_right: bool=True) -> List:
+                           default_value: Callable[[], Any] = lambda: 0,
+                           padding_on_right: bool = True) -> List:
     """
     Take a list of objects and pads it to the desired length, returning the padded list.  The
     original list is not modified.
@@ -73,8 +73,8 @@ def add_noise_to_dict_values(dictionary: Dict[Any, float], noise_param: float) -
 
 
 def clean_layer_name(input_name: str,
-                     strip_right_of_last_backslash: bool=True,
-                     strip_numerics_after_underscores: bool=True):
+                     strip_right_of_last_backslash: bool = True,
+                     strip_numerics_after_underscores: bool = True):
     """
     There exist cases when layer names need to be concatenated in order to create new, unique
     layer names. However, the indices added to layer names designating the ith output of calling
@@ -114,5 +114,4 @@ def namespace_match(pattern: str, namespace: str):
         return True
     elif pattern == namespace:
         return True
-    else:
-        return False
+    return False
diff --git a/allennlp/data/data_generator.py b/allennlp/data/data_generator.py
@@ -103,7 +103,7 @@ def __init__(self, text_trainer, params: Params):
         #: this data.
         self.last_num_batches = None
 
-    def create_generator(self, dataset: Dataset, batch_size: int=None):
+    def create_generator(self, dataset: Dataset, batch_size: int = None):
         """
         Main external API call: converts an ``IndexedDataset`` into a data generator suitable for
         use with Keras' ``fit_generator`` and related methods.
@@ -178,7 +178,7 @@ def __adaptive_grouping(self, instances: List[Instance]):
     @staticmethod
     def sort_dataset_by_padding(dataset: Dataset,
                                 sorting_keys: List[Tuple[str, str]],  # pylint: disable=invalid-sequence-index
-                                padding_noise: float=0.0) -> List[Instance]:
+                                padding_noise: float = 0.0) -> List[Instance]:
         """
         Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in
         ``sorting_keys`` (in the order in which they are provided).  ``sorting_keys`` is a list of

diff --git a/allennlp/data/dataset.py b/allennlp/data/dataset.py
@@ -75,8 +75,8 @@ def get_padding_lengths(self) -> Dict[str, Dict[str, int]]:
         return {**padding_lengths}
 
     def as_arrays(self,
-                  padding_lengths: Dict[str, Dict[str, int]]=None,
-                  verbose: bool=True) -> Dict[str, List[numpy.array]]:
+                  padding_lengths: Dict[str, Dict[str, int]] = None,
+                  verbose: bool = True) -> Dict[str, List[numpy.array]]:
         """
         This method converts this ``Dataset`` into a set of numpy arrays that can be passed through
         a model.  In order for the numpy arrays to be valid arrays, all ``Instances`` in this

diff --git a/allennlp/data/dataset_readers/language_modeling.py b/allennlp/data/dataset_readers/language_modeling.py
@@ -37,9 +37,9 @@ class LanguageModelingReader(DatasetReader):
     """
     def __init__(self,
                  filename: str,
-                 tokens_per_instance: int=None,
-                 tokenizer: Tokenizer=WordTokenizer(),
-                 token_indexers: List[TokenIndexer]=None):
+                 tokens_per_instance: int = None,
+                 tokenizer: Tokenizer = WordTokenizer(),
+                 token_indexers: List[TokenIndexer] = None):
         self._filename = filename
         self._tokens_per_instance = tokens_per_instance
         self._tokenizer = tokenizer

diff --git a/allennlp/data/dataset_readers/snli.py b/allennlp/data/dataset_readers/snli.py
@@ -29,8 +29,8 @@ class SnliReader(DatasetReader):
     """
     def __init__(self,
                  snli_filename: str,
-                 tokenizer: Tokenizer=WordTokenizer(),
-                 token_indexers: List[TokenIndexer]=None):
+                 tokenizer: Tokenizer = WordTokenizer(),
+                 token_indexers: List[TokenIndexer] = None):
         self._snli_filename = snli_filename
         self._tokenizer = tokenizer
         if token_indexers is None:

diff --git a/allennlp/data/dataset_readers/squad.py b/allennlp/data/dataset_readers/squad.py
@@ -49,9 +49,9 @@ class SquadSentenceSelectionReader(DatasetReader):
     """
     def __init__(self,
                  squad_filename: str,
-                 negative_sentence_selection: str="paragraph",
-                 tokenizer: Tokenizer=WordTokenizer(),
-                 token_indexers: List[TokenIndexer]=None):
+                 negative_sentence_selection: str = "paragraph",
+                 tokenizer: Tokenizer = WordTokenizer(),
+                 token_indexers: List[TokenIndexer] = None):
         self._squad_filename = squad_filename
         self._negative_sentence_selection_methods = negative_sentence_selection.split(",")
         self._tokenizer = tokenizer

diff --git a/allennlp/data/fields/label_field.py b/allennlp/data/fields/label_field.py
@@ -37,8 +37,8 @@ class LabelField(Field):
     """
     def __init__(self,
                  label: Union[str, int],
-                 label_namespace: str='labels',
-                 num_labels: int=None):
+                 label_namespace: str = 'labels',
+                 num_labels: int = None):
         self._label = label
         self._label_namespace = label_namespace
         if num_labels is None:

diff --git a/allennlp/data/fields/list_field.py b/allennlp/data/fields/list_field.py
@@ -60,8 +60,8 @@ def pad(self, padding_lengths: Dict[str, int]) -> List[numpy.array]:
         padded_fields = [field.pad(padding_lengths) for field in padded_field_list]
         if isinstance(padded_fields[0], (list, tuple)):
             return [numpy.asarray(x) for x in zip(*padded_fields)]
-        else:
-            return [numpy.asarray(padded_fields)]
+
+        return [numpy.asarray(padded_fields)]
 
     @overrides
     def empty_field(self):

diff --git a/allennlp/data/fields/tag_field.py b/allennlp/data/fields/tag_field.py
@@ -35,7 +35,7 @@ class TagField(Field):
         integers for you, and this parameter tells the ``Vocabulary`` object which mapping from
         strings to integers to use (so that "O" as a tag doesn't get the same id as "O" as a word).
     """
-    def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str='tags'):
+    def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str = 'tags'):
         self._tags = tags
         self._sequence_field = sequence_field
         self._tag_namespace = tag_namespace

diff --git a/allennlp/data/token_indexers/single_id_token_indexer.py b/allennlp/data/token_indexers/single_id_token_indexer.py
@@ -20,7 +20,7 @@ class SingleIdTokenIndexer(TokenIndexer):
         If ``True``, we will call ``token.lower()`` before getting an index for the token from the
         vocabulary.
     """
-    def __init__(self, token_namespace: str='tokens', lowercase_tokens: bool=False):
+    def __init__(self, token_namespace: str = 'tokens', lowercase_tokens: bool = False):
         self.token_namespace = token_namespace
         self.lowercase_tokens = lowercase_tokens
 

diff --git a/allennlp/data/token_indexers/token_characters_indexer.py b/allennlp/data/token_indexers/token_characters_indexer.py
@@ -26,8 +26,8 @@ class TokenCharactersIndexer(TokenIndexer):
         retains casing.
     """
     def __init__(self,
-                 character_namespace: str='token_characters',
-                 character_tokenizer: CharacterTokenizer=CharacterTokenizer()):
+                 character_namespace: str = 'token_characters',
+                 character_tokenizer: CharacterTokenizer = CharacterTokenizer()):
         self.character_namespace = character_namespace
         self.character_tokenizer = character_tokenizer
 

diff --git a/allennlp/data/tokenizers/character_tokenizer.py b/allennlp/data/tokenizers/character_tokenizer.py
@@ -25,7 +25,7 @@ class CharacterTokenizer(Tokenizer):
         operation.  You probably do not want to do this, as character vocabularies are generally
         not very large to begin with, but it's an option if you really want it.
     """
-    def __init__(self, byte_encoding: str=None, lowercase_characters: bool=False):
+    def __init__(self, byte_encoding: str = None, lowercase_characters: bool = False):
         self.byte_encoding = byte_encoding
         self.lowercase_characters = lowercase_characters
 

diff --git a/allennlp/data/tokenizers/word_tokenizer.py b/allennlp/data/tokenizers/word_tokenizer.py
@@ -30,9 +30,9 @@ class WordTokenizer(Tokenizer):
         The :class:`WordStemmer` to use.  Default is no stemming.
     """
     def __init__(self,
-                 word_splitter: WordSplitter=SimpleWordSplitter(),
-                 word_filter: WordFilter=PassThroughWordFilter(),
-                 word_stemmer: WordStemmer=PassThroughWordStemmer()):
+                 word_splitter: WordSplitter = SimpleWordSplitter(),
+                 word_filter: WordFilter = PassThroughWordFilter(),
+                 word_stemmer: WordStemmer = PassThroughWordStemmer()):
         self.word_splitter = word_splitter
         self.word_filter = word_filter
         self.word_stemmer = word_stemmer

diff --git a/allennlp/data/vocabulary.py b/allennlp/data/vocabulary.py
@@ -123,10 +123,10 @@ class Vocabulary:
         label fields in this code), you don't have to specify anything here.
     """
     def __init__(self,
-                 counter: Dict[str, Dict[str, int]]=None,
-                 min_count: int=1,
-                 max_vocab_size: Union[int, Dict[str, int]]=None,
-                 non_padded_namespaces: List[str]=None):
+                 counter: Dict[str, Dict[str, int]] = None,
+                 min_count: int = 1,
+                 max_vocab_size: Union[int, Dict[str, int]] = None,
+                 non_padded_namespaces: List[str] = None):
         self._padding_token = "@@PADDING@@"
         self._oov_token = "@@UNKOWN@@"
         if non_padded_namespaces is None:
@@ -150,7 +150,7 @@ def __init__(self,
                     if count >= min_count:
                         self.add_token_to_namespace(token, namespace)
 
-    def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
+    def set_from_file(self, filename: str, oov_token: str, namespace: str = "tokens"):
         """
         If you already have a vocabulary file for a trained model somewhere, and you really want to
         use that vocabulary file instead of just setting the vocabulary from a dataset, for
@@ -182,9 +182,9 @@ def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
     @classmethod
     def from_dataset(cls,
                      dataset,
-                     min_count: int=1,
-                     max_vocab_size: Union[int, Dict[str, int]]=None,
-                     non_padded_namespaces: List[str]=None) -> 'Vocabulary':
+                     min_count: int = 1,
+                     max_vocab_size: Union[int, Dict[str, int]] = None,
+                     non_padded_namespaces: List[str] = None) -> 'Vocabulary':
         """
         Constructs a vocabulary given a :class:`.Dataset` and some parameters.  We count all of the
         vocabulary items in the dataset, then pass those counts, and the other parameters, to
@@ -200,7 +200,7 @@ def from_dataset(cls,
                           max_vocab_size=max_vocab_size,
                           non_padded_namespaces=non_padded_namespaces)
 
-    def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
+    def add_token_to_namespace(self, token: str, namespace: str = 'tokens') -> int:
         """
         Adds ``token`` to the index, if it is not already present.  Either way, we return the index of
         the token.
@@ -210,20 +210,18 @@ def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
             self._token_to_index[namespace][token] = index
             self._index_to_token[namespace][index] = token
             return index
-        else:
-            return self._token_to_index[namespace][token]
+        return self._token_to_index[namespace][token]
 
-    def get_index_to_token_vocabulary(self, namespace: str='tokens') -> Dict[int, str]:
+    def get_index_to_token_vocabulary(self, namespace: str = 'tokens') -> Dict[int, str]:
         return self._index_to_token[namespace]
 
-    def get_token_index(self, token: str, namespace: str='tokens') -> int:
+    def get_token_index(self, token: str, namespace: str = 'tokens') -> int:
         if token in self._token_to_index[namespace]:
             return self._token_to_index[namespace][token]
-        else:
-            return self._token_to_index[namespace][self._oov_token]
+        return self._token_to_index[namespace][self._oov_token]
 
-    def get_token_from_index(self, index: int, namespace: str='tokens') -> str:
+    def get_token_from_index(self, index: int, namespace: str = 'tokens') -> str:
         return self._index_to_token[namespace][index]
 
-    def get_vocab_size(self, namespace: str='tokens') -> int:
+    def get_vocab_size(self, namespace: str = 'tokens') -> int:
         return len(self._token_to_index[namespace])
diff --git a/allennlp/layers/embeddings.py b/allennlp/layers/embeddings.py
@@ -46,13 +46,13 @@ class Embedding(torch.nn.Module):
     def __init__(self,
                  num_embeddings: int,
                  embedding_dim: int,
-                 weight: torch.FloatTensor=None,
-                 padding_index: int=None,
-                 trainable: bool=True,
-                 max_norm: float=None,
-                 norm_type: float=2.,
-                 scale_grad_by_freq: bool=False,
-                 sparse: bool=False):
+                 weight: torch.FloatTensor = None,
+                 padding_index: int = None,
+                 trainable: bool = True,
+                 max_norm: float = None,
+                 norm_type: float = 2.,
+                 scale_grad_by_freq: bool = False,
+                 sparse: bool = False):
         super(Embedding, self).__init__()
         self.num_embeddings = num_embeddings
         self.embedding_dim = embedding_dim
@@ -85,8 +85,8 @@ def forward(self, inputs):  # pylint: disable=arguments-differ
 
 def get_pretrained_embedding_layer(embeddings_filename: str,
                                    vocab: Vocabulary,
-                                   namespace: str="tokens",
-                                   trainable: bool=True):
+                                   namespace: str = "tokens",
+                                   trainable: bool = True):
     """
     Reads a pre-trained embedding file and generates an Embedding layer that has weights
     initialized to the pre-trained embeddings.  The Embedding layer can either be trainable or

diff --git a/requirements.txt b/requirements.txt
@@ -37,8 +37,10 @@ awscli>=1.11.91
 
 #### TESTING-RELATED PACKAGES ####
 
-# Checks style, syntax, and other useful errors
-pylint==1.6.5
+# Checks style, syntax, and other useful errors.
+# We need this commit because of https://github.com/PyCQA/pylint/pull/1430,
+# unpin when pylint 1.7.3 is released.
+git+git://github.com/PyCQA/pylint.git@2561f539d60a3563d6507e7a22e226fb10b58210
 
 # We'll use pytest to run our tests; this isn't really necessary to run the code, but it is to run
 # the tests.  With this here, you can run the tests with `py.test` from the base directory.