Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

add travis stuff #3

Merged
merged 16 commits into from
Jun 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ confidence=
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use"--disable=all --enable=classes
# --disable=W"
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,missing-docstring,too-many-arguments,too-many-locals,too-many-statements,too-many-branches,too-many-nested-blocks,too-many-instance-attributes,fixme,too-few-public-methods
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,missing-docstring,too-many-arguments,too-many-locals,too-many-statements,too-many-branches,too-many-nested-blocks,too-many-instance-attributes,fixme,too-few-public-methods,no-else-return


[REPORTS]
Expand Down
26 changes: 26 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# use new container-based travis workers
sudo: false
dist: trusty

language: python

cache:
directories:
- /home/travis/nltk_data
- /home/travis/download
- /home/travis/miniconda3
- /home/travis/.cache/pip

env:
matrix:
# run tests, submit coverage info
- COVERAGE="true"
# pylint checks skip the tests
- RUN_PYLINT="true" SKIP_TESTS="true"

notifications:
email: false

install: source build_tools/travis/install.sh
script: bash build_tools/travis/test_script.sh
after_success: source build_tools/travis/after_success.sh
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/allenai/allennlp.svg?branch=master)](https://travis-ci.org/allenai/allennlp)
[![codecov](https://codecov.io/gh/allenai/allennlp/branch/master/graph/badge.svg)](https://codecov.io/gh/allenai/allennlp)
# AllenNLP

A [Apache 2.0](https://github.com/allenai/allennlp/blob/master/LICENSE) natural language processing toolkit using state-of-the-art deep learning models.
Expand Down
17 changes: 8 additions & 9 deletions allennlp/common/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ class Params(MutableMapping):
# and passing no value to the default parameter of "pop".
DEFAULT = object()

def __init__(self, params: Dict[str, Any], history: str=""):
def __init__(self, params: Dict[str, Any], history: str = ""):
self.params = params
self.history = history

@overrides
def pop(self, key: str, default: Any=DEFAULT):
def pop(self, key: str, default: Any = DEFAULT):
"""
Performs the functionality associated with dict.pop(key), along with checking for
returned dictionaries, replacing them with Param objects with an updated history.
Expand All @@ -76,7 +76,7 @@ def pop(self, key: str, default: Any=DEFAULT):
return self.__check_is_dict(key, value)

@overrides
def get(self, key: str, default: Any=DEFAULT):
def get(self, key: str, default: Any = DEFAULT):
"""
Performs the functionality associated with dict.get(key) but also checks for returned
dicts and returns a Params object in their place with an updated history.
Expand All @@ -90,7 +90,7 @@ def get(self, key: str, default: Any=DEFAULT):
value = self.params.get(key, default)
return self.__check_is_dict(key, value)

def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool=False):
def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool = False):
"""
Gets the value of ``key`` in the ``params`` dictionary, ensuring that the value is one of
the given choices. Note that this `pops` the key from params, modifying the dictionary,
Expand Down Expand Up @@ -156,7 +156,7 @@ def assert_empty(self, class_name: str):
was one. ``class_name`` should be the name of the `calling` class, the one that got extra
parameters (if there are any).
"""
if len(self.params) != 0:
if self.params:
raise ConfigurationError("Extra parameters passed to {}: {}".format(class_name, self.params))

def __getitem__(self, key):
Expand All @@ -181,15 +181,14 @@ def __check_is_dict(self, new_history, value):
if isinstance(value, dict):
new_history = self.history + new_history + "."
return Params(value, new_history)
else:
return value
return value


def pop_choice(params: Dict[str, Any],
key: str,
choices: List[Any],
default_to_first_choice: bool=False,
history: str="?.") -> Any:
default_to_first_choice: bool = False,
history: str = "?.") -> Any:
"""
Performs the same function as :func:`Params.pop_choice`, but is required in order to deal with
places that the Params object is not welcome, such as inside Keras layers. See the docstring
Expand Down
2 changes: 1 addition & 1 deletion allennlp/common/tee_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def write(self, message):
# correctly, so we'll just make sure that each batch shows up on its one line.
if '\x08' in message:
message = message.replace('\x08', '')
if len(message) == 0 or message[-1] != '\n':
if not message or message[-1] != '\n':
message += '\n'
self.log.write(message)

Expand Down
11 changes: 5 additions & 6 deletions allennlp/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def group_by_count(iterable: List[Any], count: int, default_value: Any) -> List[

def pad_sequence_to_length(sequence: List,
desired_length: int,
default_value: Callable[[], Any]=lambda: 0,
padding_on_right: bool=True) -> List:
default_value: Callable[[], Any] = lambda: 0,
padding_on_right: bool = True) -> List:
"""
Take a list of objects and pads it to the desired length, returning the padded list. The
original list is not modified.
Expand Down Expand Up @@ -73,8 +73,8 @@ def add_noise_to_dict_values(dictionary: Dict[Any, float], noise_param: float) -


def clean_layer_name(input_name: str,
strip_right_of_last_backslash: bool=True,
strip_numerics_after_underscores: bool=True):
strip_right_of_last_backslash: bool = True,
strip_numerics_after_underscores: bool = True):
"""
There exist cases when layer names need to be concatenated in order to create new, unique
layer names. However, the indices added to layer names designating the ith output of calling
Expand Down Expand Up @@ -114,5 +114,4 @@ def namespace_match(pattern: str, namespace: str):
return True
elif pattern == namespace:
return True
else:
return False
return False
6 changes: 3 additions & 3 deletions allennlp/data/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(self, text_trainer, params: Params):
#: this data.
self.last_num_batches = None

def create_generator(self, dataset: Dataset, batch_size: int=None):
def create_generator(self, dataset: Dataset, batch_size: int = None):
"""
Main external API call: converts an ``IndexedDataset`` into a data generator suitable for
use with Keras' ``fit_generator`` and related methods.
Expand Down Expand Up @@ -177,8 +177,8 @@ def __adaptive_grouping(self, instances: List[Instance]):

@staticmethod
def sort_dataset_by_padding(dataset: Dataset,
sorting_keys: List[Tuple[str, str]],
padding_noise: float=0.0) -> List[Instance]:
sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index
padding_noise: float = 0.0) -> List[Instance]:
"""
Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in
``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def get_padding_lengths(self) -> Dict[str, Dict[str, int]]:
return {**padding_lengths}

def as_arrays(self,
padding_lengths: Dict[str, Dict[str, int]]=None,
verbose: bool=True) -> Dict[str, List[numpy.array]]:
padding_lengths: Dict[str, Dict[str, int]] = None,
verbose: bool = True) -> Dict[str, List[numpy.array]]:
"""
This method converts this ``Dataset`` into a set of numpy arrays that can be passed through
a model. In order for the numpy arrays to be valid arrays, all ``Instances`` in this
Expand Down
6 changes: 3 additions & 3 deletions allennlp/data/dataset_readers/language_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ class LanguageModelingReader(DatasetReader):
"""
def __init__(self,
filename: str,
tokens_per_instance: int=None,
tokenizer: Tokenizer=WordTokenizer(),
token_indexers: List[TokenIndexer]=None):
tokens_per_instance: int = None,
tokenizer: Tokenizer = WordTokenizer(),
token_indexers: List[TokenIndexer] = None):
self._filename = filename
self._tokens_per_instance = tokens_per_instance
self._tokenizer = tokenizer
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/dataset_readers/snli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ class SnliReader(DatasetReader):
"""
def __init__(self,
snli_filename: str,
tokenizer: Tokenizer=WordTokenizer(),
token_indexers: List[TokenIndexer]=None):
tokenizer: Tokenizer = WordTokenizer(),
token_indexers: List[TokenIndexer] = None):
self._snli_filename = snli_filename
self._tokenizer = tokenizer
if token_indexers is None:
Expand Down
10 changes: 6 additions & 4 deletions allennlp/data/dataset_readers/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ class SquadSentenceSelectionReader(DatasetReader):
"""
def __init__(self,
squad_filename: str,
negative_sentence_selection: str="paragraph",
tokenizer: Tokenizer=WordTokenizer(),
token_indexers: List[TokenIndexer]=None):
negative_sentence_selection: str = "paragraph",
tokenizer: Tokenizer = WordTokenizer(),
token_indexers: List[TokenIndexer] = None):
self._squad_filename = squad_filename
self._negative_sentence_selection_methods = negative_sentence_selection.split(",")
self._tokenizer = tokenizer
Expand All @@ -73,7 +73,9 @@ def __init__(self,
# Maps question indices to question strings
self._id_to_question = {}

def _get_sentence_choices(self, question_id: int, answer_id: int) -> Tuple[List[str], int]:
def _get_sentence_choices(self,
question_id: int,
answer_id: int) -> Tuple[List[str], int]: # pylint: disable=invalid-sequence-index
# Because sentences and questions have different indices, we need this to hold tuples of
# ("sentence", id) or ("question", id), instead of just single ids.
negative_sentences = set()
Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/fields/label_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ class LabelField(Field):
"""
def __init__(self,
label: Union[str, int],
label_namespace: str='labels',
num_labels: int=None):
label_namespace: str = 'labels',
num_labels: int = None):
self._label = label
self._label_namespace = label_namespace
if num_labels is None:
Expand Down
2 changes: 1 addition & 1 deletion allennlp/data/fields/tag_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TagField(Field):
integers for you, and this parameter tells the ``Vocabulary`` object which mapping from
strings to integers to use (so that "O" as a tag doesn't get the same id as "O" as a word).
"""
def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str='tags'):
def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str = 'tags'):
self._tags = tags
self._sequence_field = sequence_field
self._tag_namespace = tag_namespace
Expand Down
2 changes: 1 addition & 1 deletion allennlp/data/token_indexers/single_id_token_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class SingleIdTokenIndexer(TokenIndexer):
If ``True``, we will call ``token.lower()`` before getting an index for the token from the
vocabulary.
"""
def __init__(self, token_namespace: str='tokens', lowercase_tokens: bool=False):
def __init__(self, token_namespace: str = 'tokens', lowercase_tokens: bool = False):
self.token_namespace = token_namespace
self.lowercase_tokens = lowercase_tokens

Expand Down
4 changes: 2 additions & 2 deletions allennlp/data/token_indexers/token_characters_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class TokenCharactersIndexer(TokenIndexer):
retains casing.
"""
def __init__(self,
character_namespace: str='token_characters',
character_tokenizer: CharacterTokenizer=CharacterTokenizer()):
character_namespace: str = 'token_characters',
character_tokenizer: CharacterTokenizer = CharacterTokenizer()):
self.character_namespace = character_namespace
self.character_tokenizer = character_tokenizer

Expand Down
2 changes: 1 addition & 1 deletion allennlp/data/tokenizers/character_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class CharacterTokenizer(Tokenizer):
operation. You probably do not want to do this, as character vocabularies are generally
not very large to begin with, but it's an option if you really want it.
"""
def __init__(self, byte_encoding: str=None, lowercase_characters: bool=False):
def __init__(self, byte_encoding: str = None, lowercase_characters: bool = False):
self.byte_encoding = byte_encoding
self.lowercase_characters = lowercase_characters

Expand Down
6 changes: 3 additions & 3 deletions allennlp/data/tokenizers/word_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ class WordTokenizer(Tokenizer):
The :class:`WordStemmer` to use. Default is no stemming.
"""
def __init__(self,
word_splitter: WordSplitter=SimpleWordSplitter(),
word_filter: WordFilter=PassThroughWordFilter(),
word_stemmer: WordStemmer=PassThroughWordStemmer()):
word_splitter: WordSplitter = SimpleWordSplitter(),
word_filter: WordFilter = PassThroughWordFilter(),
word_stemmer: WordStemmer = PassThroughWordStemmer()):
self.word_splitter = word_splitter
self.word_filter = word_filter
self.word_stemmer = word_stemmer
Expand Down
28 changes: 14 additions & 14 deletions allennlp/data/vocabulary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from collections import defaultdict
from typing import Any, Callable, Dict, List, Union

import codecs
import logging

import tqdm

from ..common.util import namespace_match
Expand Down Expand Up @@ -123,10 +123,10 @@ class Vocabulary:
label fields in this code), you don't have to specify anything here.
"""
def __init__(self,
counter: Dict[str, Dict[str, int]]=None,
min_count: int=1,
max_vocab_size: Union[int, Dict[str, int]]=None,
non_padded_namespaces: List[str]=None):
counter: Dict[str, Dict[str, int]] = None,
min_count: int = 1,
max_vocab_size: Union[int, Dict[str, int]] = None,
non_padded_namespaces: List[str] = None):
self._padding_token = "@@PADDING@@"
self._oov_token = "@@UNKOWN@@"
if non_padded_namespaces is None:
Expand All @@ -150,7 +150,7 @@ def __init__(self,
if count >= min_count:
self.add_token_to_namespace(token, namespace)

def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
def set_from_file(self, filename: str, oov_token: str, namespace: str = "tokens"):
"""
If you already have a vocabulary file for a trained model somewhere, and you really want to
use that vocabulary file instead of just setting the vocabulary from a dataset, for
Expand Down Expand Up @@ -182,9 +182,9 @@ def set_from_file(self, filename: str, oov_token: str, namespace: str="tokens"):
@classmethod
def from_dataset(cls,
dataset,
min_count: int=1,
max_vocab_size: Union[int, Dict[str, int]]=None,
non_padded_namespaces: List[str]=None) -> 'Vocabulary':
min_count: int = 1,
max_vocab_size: Union[int, Dict[str, int]] = None,
non_padded_namespaces: List[str] = None) -> 'Vocabulary':
"""
Constructs a vocabulary given a :class:`.Dataset` and some parameters. We count all of the
vocabulary items in the dataset, then pass those counts, and the other parameters, to
Expand All @@ -200,7 +200,7 @@ def from_dataset(cls,
max_vocab_size=max_vocab_size,
non_padded_namespaces=non_padded_namespaces)

def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
def add_token_to_namespace(self, token: str, namespace: str = 'tokens') -> int:
"""
Adds ``token`` to the index, if it is not already present. Either way, we return the index of
the token.
Expand All @@ -213,17 +213,17 @@ def add_token_to_namespace(self, token: str, namespace: str='tokens') -> int:
else:
return self._token_to_index[namespace][token]

def get_index_to_token_vocabulary(self, namespace: str='tokens') -> Dict[int, str]:
def get_index_to_token_vocabulary(self, namespace: str = 'tokens') -> Dict[int, str]:
return self._index_to_token[namespace]

def get_token_index(self, token: str, namespace: str='tokens') -> int:
def get_token_index(self, token: str, namespace: str = 'tokens') -> int:
if token in self._token_to_index[namespace]:
return self._token_to_index[namespace][token]
else:
return self._token_to_index[namespace][self._oov_token]

def get_token_from_index(self, index: int, namespace: str='tokens') -> str:
def get_token_from_index(self, index: int, namespace: str = 'tokens') -> str:
return self._index_to_token[namespace][index]

def get_vocab_size(self, namespace: str='tokens') -> int:
def get_vocab_size(self, namespace: str = 'tokens') -> int:
return len(self._token_to_index[namespace])
Loading