Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
fix pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
DeNeutoy committed Jun 27, 2017
1 parent cad4fbb commit 56d5b50
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 207 deletions.
2 changes: 1 addition & 1 deletion allennlp/data/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def __adaptive_grouping(self, instances: List[Instance]):

@staticmethod
def sort_dataset_by_padding(dataset: Dataset,
sorting_keys: List[Tuple[str, str]],
sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index
padding_noise: float=0.0) -> List[Instance]:
"""
Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in
Expand Down
4 changes: 3 additions & 1 deletion allennlp/data/dataset_readers/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ def __init__(self,
# Maps question indices to question strings
self._id_to_question = {}

def _get_sentence_choices(self, question_id: int, answer_id: int) -> Tuple[List[str], int]:
def _get_sentence_choices(self,
question_id: int,
answer_id: int) -> Tuple[List[str], int]: # pylint: disable=invalid-sequence-index
# Because sentences and questions have different indices, we need this to hold tuples of
# ("sentence", id) or ("question", id), instead of just single ids.
negative_sentences = set()
Expand Down
5 changes: 3 additions & 2 deletions allennlp/data/vocabulary.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from collections import defaultdict
from ..common.util import namespace_match
from typing import Dict, List, Union
import codecs
import logging

import tqdm

from ..common.util import namespace_match

logger = logging.getLogger(__name__) # pylint: disable=invalid-name


class _NamespaceDependentDefaultDict(defaultdict):
"""
Sometimes certain namespaces need padding (like "tokens") and some don't (like
Expand Down
4 changes: 2 additions & 2 deletions allennlp/layers/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ def __init__(self,
if self.padding_index is not None:
self.weight.data[self.padding_index].fill_(0)

def forward(self, input):
def forward(self, inputs): # pylint: disable=arguments-differ
padding_index = self.padding_index if self.padding_index is not None else -1
return self._backend.Embedding(padding_index,
self.max_norm,
self.norm_type,
self.scale_grad_by_freq,
self.sparse)(input, self.weight)
self.sparse)(inputs, self.weight)


def get_pretrained_embedding_layer(embeddings_filename: str,
Expand Down
322 changes: 161 additions & 161 deletions tests/data/data_generator_to_fix.py
Original file line number Diff line number Diff line change
@@ -1,161 +1,161 @@
# pylint: disable=no-self-use,invalid-name
import numpy

from allennlp.common.params import Params
from allennlp.data.data_generator import DataGenerator
from allennlp.testing.test_case import AllenNlpTestCase


class TestDataGenerator(AllenNlpTestCase):
def setUp(self):
super(TestDataGenerator, self).setUp()
self.text_trainer = FakeTextTrainer()
self.instances = [
FakeInstance(0, 5, 3, 2),
FakeInstance(1, 4, 3, 2),
FakeInstance(2, 4, 1, 2),
FakeInstance(3, 9, 3, 2),
FakeInstance(4, 8, 3, 2),
FakeInstance(5, 2, 1, 2),
FakeInstance(6, 3, 3, 2),
FakeInstance(7, 3, 3, 3),
FakeInstance(8, 1, 1, 2),
FakeInstance(9, 1, 1, 3),
]

def test_instances_are_sorted_by_sorting_keys(self):
params = Params({
'dynamic_padding': True,
'padding_noise': 0.0,
})
generator = DataGenerator(self.text_trainer, params)
batches = generator.create_generator(IndexedDataset(self.instances))
assert generator.last_num_batches == 4
one_epoch_arrays = [next(batches) for _ in range(4)]
one_epoch_arrays.sort(key=lambda x: x[0][0])
assert self.as_list(one_epoch_arrays[0][0]) == [1, 0, 4]
assert self.as_list(one_epoch_arrays[1][0]) == [3]
assert self.as_list(one_epoch_arrays[2][0]) == [6, 7, 2]
assert self.as_list(one_epoch_arrays[3][0]) == [8, 9, 5]

def test_batches_are_consistent_with_no_repermuting(self):
params = Params({
'padding_noise': 0.0,
'sort_every_epoch': False,
'dynamic_padding': True,
})
generator = DataGenerator(self.text_trainer, params)
batches = generator.create_generator(IndexedDataset(self.instances))
assert generator.last_num_batches == 4
first_epoch_arrays = [next(batches) for _ in range(4)]
second_epoch_arrays = [next(batches) for _ in range(4)]
first_epoch_arrays.sort(key=lambda x: x[0][0])
second_epoch_arrays.sort(key=lambda x: x[0][0])
first_epoch = [self.as_list(x[0]) for x in first_epoch_arrays]
second_epoch = [self.as_list(x[0]) for x in second_epoch_arrays]
assert first_epoch == second_epoch

def test_biggest_batch_first(self):
params = Params({
'padding_noise': 0.0,
'dynamic_padding': True,
'biggest_batch_first': True,
})
generator = DataGenerator(self.text_trainer, params)
batches = generator.create_generator(IndexedDataset(self.instances))
biggest_batches = [next(batches) for _ in range(2)]
assert self.as_list(biggest_batches[0][0]) == [3]
assert self.as_list(biggest_batches[1][0]) == [1, 0, 4]

def test_adaptive_grouping(self):
params = Params({
'padding_noise': 0.0,
'dynamic_padding': True,
'adaptive_batch_sizes': True,
'adaptive_memory_usage_constant': 130,
})
generator = DataGenerator(self.text_trainer, params)
batches = generator.create_generator(IndexedDataset(self.instances))
assert generator.last_num_batches == 4
one_epoch_arrays = [next(batches) for _ in range(4)]
one_epoch_arrays.sort(key=lambda x: x[0][0])
assert self.as_list(one_epoch_arrays[0][0]) == [0, 4]
assert self.as_list(one_epoch_arrays[1][0]) == [3]
assert self.as_list(one_epoch_arrays[2][0]) == [7, 2, 1]
assert self.as_list(one_epoch_arrays[3][0]) == [8, 9, 5, 6]

def test_sort_every_batch_actually_adds_noise_every_batch(self):
# We're just going to get two epoch's worth of batches, and make sure that they're
# different.
params = Params({
'padding_noise': 0.8,
'sort_every_epoch': True,
'dynamic_padding': True,
})
generator = DataGenerator(self.text_trainer, params)
batches = generator.create_generator(IndexedDataset(self.instances))
assert generator.last_num_batches == 4
first_epoch_arrays = [next(batches) for _ in range(4)]
second_epoch_arrays = [next(batches) for _ in range(4)]
first_epoch_arrays.sort(key=lambda x: x[0][0])
second_epoch_arrays.sort(key=lambda x: x[0][0])
first_epoch = [self.as_list(x[0]) for x in first_epoch_arrays]
second_epoch = [self.as_list(x[0]) for x in second_epoch_arrays]
assert first_epoch != second_epoch

def test_maximum_batch_size_is_actually_a_maximum(self):
params = Params({
'padding_noise': 0.0,
'dynamic_padding': True,
'adaptive_batch_sizes': True,
'adaptive_memory_usage_constant': 50,
'maximum_batch_size': 2,
})
generator = DataGenerator(self.text_trainer, params)
batches = generator.create_generator(IndexedDataset(self.instances))
assert generator.last_num_batches == 7
one_epoch_arrays = [next(batches) for _ in range(7)]
one_epoch_arrays.sort(key=lambda x: x[0][0])
print([self.as_list(x[0]) for x in one_epoch_arrays])
assert self.as_list(one_epoch_arrays[0][0]) == [0]
assert self.as_list(one_epoch_arrays[1][0]) == [2, 1]
assert self.as_list(one_epoch_arrays[2][0]) == [3]
assert self.as_list(one_epoch_arrays[3][0]) == [4]
assert self.as_list(one_epoch_arrays[4][0]) == [5, 6]
assert self.as_list(one_epoch_arrays[5][0]) == [7]
assert self.as_list(one_epoch_arrays[6][0]) == [8, 9]

def as_list(self, array):
return list(numpy.squeeze(array, axis=-1))


class FakeInstance:
def __init__(self, index, a_length, b_length, c_length):
self.index = index
self.a_length = a_length
self.b_length = b_length
self.c_length = c_length

def get_padding_lengths(self):
return {'a': self.a_length, 'b': self.b_length, 'c': self.c_length}

def pad(self, lengths):
pass

def as_training_data(self):
return numpy.asarray([self.index]), numpy.asarray([self.index])


class FakeTextTrainer:
batch_size = 3
a_length = None
b_length = None
c_length = None
def get_instance_sorting_keys(self):
return ['a', 'b', 'c']

def get_padding_lengths(self):
return {'a': self.a_length, 'b': self.b_length, 'c': self.c_length}

def get_padding_memory_scaling(self, lengths):
return lengths['a'] * lengths['b'] * lengths['c']
# # pylint: disable=no-self-use,invalid-name
# import numpy
#
# from allennlp.common.params import Params
# from allennlp.data.data_generator import DataGenerator
# from allennlp.testing.test_case import AllenNlpTestCase
#
#
# class TestDataGenerator(AllenNlpTestCase):
# def setUp(self):
# super(TestDataGenerator, self).setUp()
# self.text_trainer = FakeTextTrainer()
# self.instances = [
# FakeInstance(0, 5, 3, 2),
# FakeInstance(1, 4, 3, 2),
# FakeInstance(2, 4, 1, 2),
# FakeInstance(3, 9, 3, 2),
# FakeInstance(4, 8, 3, 2),
# FakeInstance(5, 2, 1, 2),
# FakeInstance(6, 3, 3, 2),
# FakeInstance(7, 3, 3, 3),
# FakeInstance(8, 1, 1, 2),
# FakeInstance(9, 1, 1, 3),
# ]
#
# def test_instances_are_sorted_by_sorting_keys(self):
# params = Params({
# 'dynamic_padding': True,
# 'padding_noise': 0.0,
# })
# generator = DataGenerator(self.text_trainer, params)
# batches = generator.create_generator(IndexedDataset(self.instances))
# assert generator.last_num_batches == 4
# one_epoch_arrays = [next(batches) for _ in range(4)]
# one_epoch_arrays.sort(key=lambda x: x[0][0])
# assert self.as_list(one_epoch_arrays[0][0]) == [1, 0, 4]
# assert self.as_list(one_epoch_arrays[1][0]) == [3]
# assert self.as_list(one_epoch_arrays[2][0]) == [6, 7, 2]
# assert self.as_list(one_epoch_arrays[3][0]) == [8, 9, 5]
#
# def test_batches_are_consistent_with_no_repermuting(self):
# params = Params({
# 'padding_noise': 0.0,
# 'sort_every_epoch': False,
# 'dynamic_padding': True,
# })
# generator = DataGenerator(self.text_trainer, params)
# batches = generator.create_generator(IndexedDataset(self.instances))
# assert generator.last_num_batches == 4
# first_epoch_arrays = [next(batches) for _ in range(4)]
# second_epoch_arrays = [next(batches) for _ in range(4)]
# first_epoch_arrays.sort(key=lambda x: x[0][0])
# second_epoch_arrays.sort(key=lambda x: x[0][0])
# first_epoch = [self.as_list(x[0]) for x in first_epoch_arrays]
# second_epoch = [self.as_list(x[0]) for x in second_epoch_arrays]
# assert first_epoch == second_epoch
#
# def test_biggest_batch_first(self):
# params = Params({
# 'padding_noise': 0.0,
# 'dynamic_padding': True,
# 'biggest_batch_first': True,
# })
# generator = DataGenerator(self.text_trainer, params)
# batches = generator.create_generator(IndexedDataset(self.instances))
# biggest_batches = [next(batches) for _ in range(2)]
# assert self.as_list(biggest_batches[0][0]) == [3]
# assert self.as_list(biggest_batches[1][0]) == [1, 0, 4]
#
# def test_adaptive_grouping(self):
# params = Params({
# 'padding_noise': 0.0,
# 'dynamic_padding': True,
# 'adaptive_batch_sizes': True,
# 'adaptive_memory_usage_constant': 130,
# })
# generator = DataGenerator(self.text_trainer, params)
# batches = generator.create_generator(IndexedDataset(self.instances))
# assert generator.last_num_batches == 4
# one_epoch_arrays = [next(batches) for _ in range(4)]
# one_epoch_arrays.sort(key=lambda x: x[0][0])
# assert self.as_list(one_epoch_arrays[0][0]) == [0, 4]
# assert self.as_list(one_epoch_arrays[1][0]) == [3]
# assert self.as_list(one_epoch_arrays[2][0]) == [7, 2, 1]
# assert self.as_list(one_epoch_arrays[3][0]) == [8, 9, 5, 6]
#
# def test_sort_every_batch_actually_adds_noise_every_batch(self):
# # We're just going to get two epoch's worth of batches, and make sure that they're
# # different.
# params = Params({
# 'padding_noise': 0.8,
# 'sort_every_epoch': True,
# 'dynamic_padding': True,
# })
# generator = DataGenerator(self.text_trainer, params)
# batches = generator.create_generator(IndexedDataset(self.instances))
# assert generator.last_num_batches == 4
# first_epoch_arrays = [next(batches) for _ in range(4)]
# second_epoch_arrays = [next(batches) for _ in range(4)]
# first_epoch_arrays.sort(key=lambda x: x[0][0])
# second_epoch_arrays.sort(key=lambda x: x[0][0])
# first_epoch = [self.as_list(x[0]) for x in first_epoch_arrays]
# second_epoch = [self.as_list(x[0]) for x in second_epoch_arrays]
# assert first_epoch != second_epoch
#
# def test_maximum_batch_size_is_actually_a_maximum(self):
# params = Params({
# 'padding_noise': 0.0,
# 'dynamic_padding': True,
# 'adaptive_batch_sizes': True,
# 'adaptive_memory_usage_constant': 50,
# 'maximum_batch_size': 2,
# })
# generator = DataGenerator(self.text_trainer, params)
# batches = generator.create_generator(IndexedDataset(self.instances))
# assert generator.last_num_batches == 7
# one_epoch_arrays = [next(batches) for _ in range(7)]
# one_epoch_arrays.sort(key=lambda x: x[0][0])
# print([self.as_list(x[0]) for x in one_epoch_arrays])
# assert self.as_list(one_epoch_arrays[0][0]) == [0]
# assert self.as_list(one_epoch_arrays[1][0]) == [2, 1]
# assert self.as_list(one_epoch_arrays[2][0]) == [3]
# assert self.as_list(one_epoch_arrays[3][0]) == [4]
# assert self.as_list(one_epoch_arrays[4][0]) == [5, 6]
# assert self.as_list(one_epoch_arrays[5][0]) == [7]
# assert self.as_list(one_epoch_arrays[6][0]) == [8, 9]
#
# def as_list(self, array):
# return list(numpy.squeeze(array, axis=-1))
#
#
# class FakeInstance:
# def __init__(self, index, a_length, b_length, c_length):
# self.index = index
# self.a_length = a_length
# self.b_length = b_length
# self.c_length = c_length
#
# def get_padding_lengths(self):
# return {'a': self.a_length, 'b': self.b_length, 'c': self.c_length}
#
# def pad(self, lengths):
# pass
#
# def as_training_data(self):
# return numpy.asarray([self.index]), numpy.asarray([self.index])
#
#
# class FakeTextTrainer:
# batch_size = 3
# a_length = None
# b_length = None
# c_length = None
# def get_instance_sorting_keys(self):
# return ['a', 'b', 'c']
#
# def get_padding_lengths(self):
# return {'a': self.a_length, 'b': self.b_length, 'c': self.c_length}
#
# def get_padding_memory_scaling(self, lengths):
# return lengths['a'] * lengths['b'] * lengths['c']
Loading

0 comments on commit 56d5b50

Please sign in to comment.