Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

add travis stuff #3

Merged
merged 16 commits into from
Jun 28, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions allennlp/common/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ class Params(MutableMapping):
# and passing no value to the default parameter of "pop".
DEFAULT = object()

def __init__(self, params: Dict[str, Any], history: str=""):
def __init__(self, params: Dict[str, Any], history: str = ""):
self.params = params
self.history = history

@overrides
def pop(self, key: str, default: Any=DEFAULT):
def pop(self, key: str, default: Any = DEFAULT):
"""
Performs the functionality associated with dict.pop(key), along with checking for
returned dictionaries, replacing them with Param objects with an updated history.
Expand All @@ -76,7 +76,7 @@ def pop(self, key: str, default: Any=DEFAULT):
return self.__check_is_dict(key, value)

@overrides
def get(self, key: str, default: Any=DEFAULT):
def get(self, key: str, default: Any = DEFAULT):
"""
Performs the functionality associated with dict.get(key) but also checks for returned
dicts and returns a Params object in their place with an updated history.
Expand All @@ -90,7 +90,7 @@ def get(self, key: str, default: Any=DEFAULT):
value = self.params.get(key, default)
return self.__check_is_dict(key, value)

def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool=False):
def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool = False):
"""
Gets the value of ``key`` in the ``params`` dictionary, ensuring that the value is one of
the given choices. Note that this `pops` the key from params, modifying the dictionary,
Expand Down Expand Up @@ -156,7 +156,7 @@ def assert_empty(self, class_name: str):
was one. ``class_name`` should be the name of the `calling` class, the one that got extra
parameters (if there are any).
"""
if len(self.params) != 0:
if self.params:
raise ConfigurationError("Extra parameters passed to {}: {}".format(class_name, self.params))

def __getitem__(self, key):
Expand All @@ -181,15 +181,14 @@ def __check_is_dict(self, new_history, value):
if isinstance(value, dict):
new_history = self.history + new_history + "."
return Params(value, new_history)
else:
return value
return value


def pop_choice(params: Dict[str, Any],
key: str,
choices: List[Any],
default_to_first_choice: bool=False,
history: str="?.") -> Any:
default_to_first_choice: bool = False,
history: str = "?.") -> Any:
"""
Performs the same function as :func:`Params.pop_choice`, but is required in order to deal with
places that the Params object is not welcome, such as inside Keras layers. See the docstring
Expand Down
2 changes: 1 addition & 1 deletion allennlp/common/tee_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def write(self, message):
# correctly, so we'll just make sure that each batch shows up on its one line.
if '\x08' in message:
message = message.replace('\x08', '')
if len(message) == 0 or message[-1] != '\n':
if message or message[-1] != '\n':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this one should be if not message or message[-1] != '\n':.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks

message += '\n'
self.log.write(message)

Expand Down
11 changes: 5 additions & 6 deletions allennlp/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def group_by_count(iterable: List[Any], count: int, default_value: Any) -> List[

def pad_sequence_to_length(sequence: List,
desired_length: int,
default_value: Callable[[], Any]=lambda: 0,
padding_on_right: bool=True) -> List:
default_value: Callable[[], Any] = lambda: 0,
padding_on_right: bool = True) -> List:
"""
Take a list of objects and pads it to the desired length, returning the padded list. The
original list is not modified.
Expand Down Expand Up @@ -73,8 +73,8 @@ def add_noise_to_dict_values(dictionary: Dict[Any, float], noise_param: float) -


def clean_layer_name(input_name: str,
strip_right_of_last_backslash: bool=True,
strip_numerics_after_underscores: bool=True):
strip_right_of_last_backslash: bool = True,
strip_numerics_after_underscores: bool = True):
"""
There exist cases when layer names need to be concatenated in order to create new, unique
layer names. However, the indices added to layer names designating the ith output of calling
Expand Down Expand Up @@ -114,5 +114,4 @@ def namespace_match(pattern: str, namespace: str):
return True
elif pattern == namespace:
return True
else:
return False
return False
4 changes: 2 additions & 2 deletions allennlp/data/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(self, text_trainer, params: Params):
#: this data.
self.last_num_batches = None

def create_generator(self, dataset: Dataset, batch_size: int=None):
def create_generator(self, dataset: Dataset, batch_size: int = None):
"""
Main external API call: converts an ``IndexedDataset`` into a data generator suitable for
use with Keras' ``fit_generator`` and related methods.
Expand Down Expand Up @@ -178,7 +178,7 @@ def __adaptive_grouping(self, instances: List[Instance]):
@staticmethod
def sort_dataset_by_padding(dataset: Dataset,
sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index
padding_noise: float=0.0) -> List[Instance]:
padding_noise: float = 0.0) -> List[Instance]:
"""
Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in
``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def get_padding_lengths(self) -> Dict[str, Dict[str, int]]:
return {**padding_lengths}

def as_arrays(self,
padding_lengths: Dict[str, Dict[str, int]]=None,
verbose: bool=True) -> Dict[str, List[numpy.array]]:
padding_lengths: Dict[str, Dict[str, int]] = None,
verbose: bool = True) -> Dict[str, List[numpy.array]]:
"""
This method converts this ``Dataset`` into a set of numpy arrays that can be passed through
a model. In order for the numpy arrays to be valid arrays, all ``Instances`` in this
Expand Down
6 changes: 3 additions & 3 deletions allennlp/data/dataset_readers/language_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ class LanguageModelingReader(DatasetReader):
"""
def __init__(self,
filename: str,
tokens_per_instance: int=None,
tokenizer: Tokenizer=WordTokenizer(),
token_indexers: List[TokenIndexer]=None):
tokens_per_instance: int = None,
tokenizer: Tokenizer = WordTokenizer(),
token_indexers: List[TokenIndexer] = None):
self._filename = filename
self._tokens_per_instance = tokens_per_instance
self._tokenizer = tokenizer
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/dataset_readers/snli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ class SnliReader(DatasetReader):
"""
def __init__(self,
snli_filename: str,
tokenizer: Tokenizer=WordTokenizer(),
token_indexers: List[TokenIndexer]=None):
tokenizer: Tokenizer = WordTokenizer(),
token_indexers: List[TokenIndexer] = None):
self._snli_filename = snli_filename
self._tokenizer = tokenizer
if token_indexers is None:
Expand Down
6 changes: 3 additions & 3 deletions allennlp/data/dataset_readers/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ class SquadSentenceSelectionReader(DatasetReader):
"""
def __init__(self,
squad_filename: str,
negative_sentence_selection: str="paragraph",
tokenizer: Tokenizer=WordTokenizer(),
token_indexers: List[TokenIndexer]=None):
negative_sentence_selection: str = "paragraph",
tokenizer: Tokenizer = WordTokenizer(),
token_indexers: List[TokenIndexer] = None):
self._squad_filename = squad_filename
self._negative_sentence_selection_methods = negative_sentence_selection.split(",")
self._tokenizer = tokenizer
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/fields/label_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ class LabelField(Field):
"""
def __init__(self,
label: Union[str, int],
label_namespace: str='labels',
num_labels: int=None):
label_namespace: str = 'labels',
num_labels: int = None):
self._label = label
self._label_namespace = label_namespace
if num_labels is None:
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/fields/list_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def pad(self, padding_lengths: Dict[str, int]) -> List[numpy.array]:
padded_fields = [field.pad(padding_lengths) for field in padded_field_list]
if isinstance(padded_fields[0], (list, tuple)):
return [numpy.asarray(x) for x in zip(*padded_fields)]
else:
return [numpy.asarray(padded_fields)]

return [numpy.asarray(padded_fields)]

@overrides
def empty_field(self):
Expand Down
2 changes: 1 addition & 1 deletion allennlp/data/fields/tag_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TagField(Field):
integers for you, and this parameter tells the ``Vocabulary`` object which mapping from
strings to integers to use (so that "O" as a tag doesn't get the same id as "O" as a word).
"""
def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str='tags'):
def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str = 'tags'):
self._tags = tags
self._sequence_field = sequence_field
self._tag_namespace = tag_namespace
Expand Down
2 changes: 1 addition & 1 deletion allennlp/data/token_indexers/single_id_token_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class SingleIdTokenIndexer(TokenIndexer):
If ``True``, we will call ``token.lower()`` before getting an index for the token from the
vocabulary.
"""
def __init__(self, token_namespace: str='tokens', lowercase_tokens: bool=False):
def __init__(self, token_namespace: str = 'tokens', lowercase_tokens: bool = False):
self.token_namespace = token_namespace
self.lowercase_tokens = lowercase_tokens

Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/token_indexers/token_characters_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class TokenCharactersIndexer(TokenIndexer):
retains casing.
"""
def __init__(self,
character_namespace: str='token_characters',
character_tokenizer: CharacterTokenizer=CharacterTokenizer()):
character_namespace: str = 'token_characters',
character_tokenizer: CharacterTokenizer = CharacterTokenizer()):
self.character_namespace = character_namespace
self.character_tokenizer = character_tokenizer

Expand Down
2 changes: 1 addition & 1 deletion allennlp/data/tokenizers/character_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class CharacterTokenizer(Tokenizer):
operation. You probably do not want to do this, as character vocabularies are generally
not very large to begin with, but it's an option if you really want it.
"""
def __init__(self, byte_encoding: str=None, lowercase_characters: bool=False):
def __init__(self, byte_encoding: str = None, lowercase_characters: bool = False):
self.byte_encoding = byte_encoding
self.lowercase_characters = lowercase_characters

Expand Down
6 changes: 3 additions & 3 deletions allennlp/data/tokenizers/word_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ class WordTokenizer(Tokenizer):
The :class:`WordStemmer` to use. Default is no stemming.
"""
def __init__(self,
word_splitter: WordSplitter=SimpleWordSplitter(),
word_filter: WordFilter=PassThroughWordFilter(),
word_stemmer: WordStemmer=PassThroughWordStemmer()):
word_splitter: WordSplitter = SimpleWordSplitter(),
word_filter: WordFilter = PassThroughWordFilter(),
word_stemmer: WordStemmer = PassThroughWordStemmer()):
self.word_splitter = word_splitter
self.word_filter = word_filter
self.word_stemmer = word_stemmer
Expand Down
32 changes: 15 additions & 17 deletions allennlp/data/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ class Vocabulary:
label fields in this code), you don't have to specify anything here.
"""
def __init__(self,
counter: Dict[str, Dict[str, int]]=None,
min_count: int=1,
max_vocab_size: Union[int, Dict[str, int]]=None,
non_padded_namespaces: List[str]=None):
counter: Dict[str, Dict[str, int]] = None,
min_count: int = 1,
max_vocab_size: Union[int, Dict[str, int]] = None,
non_padded_namespaces: List[str] = None):
self._padding_token = "@@PADDING@@"
self._oov_token = "@@UNKOWN@@"
if non_padded_namespaces is None:
Expand All @@ -150,7 +150,7 @@ def __init__(self,
if count >= min_count:
self.add_token_to_namespace(token, namespace)

def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
def set_from_file(self, filename: str, oov_token: str, namespace: str = "tokens"):
"""
If you already have a vocabulary file for a trained model somewhere, and you really want to
use that vocabulary file instead of just setting the vocabulary from a dataset, for
Expand Down Expand Up @@ -182,9 +182,9 @@ def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
@classmethod
def from_dataset(cls,
dataset,
min_count: int=1,
max_vocab_size: Union[int, Dict[str, int]]=None,
non_padded_namespaces: List[str]=None) -> 'Vocabulary':
min_count: int = 1,
max_vocab_size: Union[int, Dict[str, int]] = None,
non_padded_namespaces: List[str] = None) -> 'Vocabulary':
"""
Constructs a vocabulary given a :class:`.Dataset` and some parameters. We count all of the
vocabulary items in the dataset, then pass those counts, and the other parameters, to
Expand All @@ -200,7 +200,7 @@ def from_dataset(cls,
max_vocab_size=max_vocab_size,
non_padded_namespaces=non_padded_namespaces)

def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
def add_token_to_namespace(self, token: str, namespace: str = 'tokens') -> int:
"""
Adds ``token`` to the index, if it is not already present. Either way, we return the index of
the token.
Expand All @@ -210,20 +210,18 @@ def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
self._token_to_index[namespace][token] = index
self._index_to_token[namespace][index] = token
return index
else:
return self._token_to_index[namespace][token]
return self._token_to_index[namespace][token]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I disagree with pylint on the readability of this particular error message, and I'm tempted to disable it. It's not a strong opinion, though - what do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep agreed, i'll disable it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should disable it in .pylintrc, not inline, by the way, in case you didn't know you could do that.


def get_index_to_token_vocabulary(self, namespace: str='tokens') -> Dict[int, str]:
def get_index_to_token_vocabulary(self, namespace: str = 'tokens') -> Dict[int, str]:
return self._index_to_token[namespace]

def get_token_index(self, token: str, namespace: str='tokens') -> int:
def get_token_index(self, token: str, namespace: str = 'tokens') -> int:
if token in self._token_to_index[namespace]:
return self._token_to_index[namespace][token]
else:
return self._token_to_index[namespace][self._oov_token]
return self._token_to_index[namespace][self._oov_token]

def get_token_from_index(self, index: int, namespace: str='tokens') -> str:
def get_token_from_index(self, index: int, namespace: str = 'tokens') -> str:
return self._index_to_token[namespace][index]

def get_vocab_size(self, namespace: str='tokens') -> int:
def get_vocab_size(self, namespace: str = 'tokens') -> int:
return len(self._token_to_index[namespace])
18 changes: 9 additions & 9 deletions allennlp/layers/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ class Embedding(torch.nn.Module):
def __init__(self,
num_embeddings: int,
embedding_dim: int,
weight: torch.FloatTensor=None,
padding_index: int=None,
trainable: bool=True,
max_norm: float=None,
norm_type: float=2.,
scale_grad_by_freq: bool=False,
sparse: bool=False):
weight: torch.FloatTensor = None,
padding_index: int = None,
trainable: bool = True,
max_norm: float = None,
norm_type: float = 2.,
scale_grad_by_freq: bool = False,
sparse: bool = False):
super(Embedding, self).__init__()
self.num_embeddings = num_embeddings
self.embedding_dim = embedding_dim
Expand Down Expand Up @@ -85,8 +85,8 @@ def forward(self, inputs): # pylint: disable=arguments-differ

def get_pretrained_embedding_layer(embeddings_filename: str,
vocab: Vocabulary,
namespace: str="tokens",
trainable: bool=True):
namespace: str = "tokens",
trainable: bool = True):
"""
Reads a pre-trained embedding file and generates an Embedding layer that has weights
initialized to the pre-trained embeddings. The Embedding layer can either be trainable or
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ awscli>=1.11.91

#### TESTING-RELATED PACKAGES ####

# Checks style, syntax, and other useful errors
pylint==1.6.5
# Checks style, syntax, and other useful errors.
# We need this commit because of https://github.com/PyCQA/pylint/pull/1430,
# unpin when pylint 1.7.3 is released.
git+git://github.com/PyCQA/pylint.git@2561f539d60a3563d6507e7a22e226fb10b58210

# We'll use pytest to run our tests; this isn't really necessary to run the code, but it is to run
# the tests. With this here, you can run the tests with `py.test` from the base directory.
Expand Down