initial commit

lezwon · Mar 31, 2019 · 8dfb8f9 · 8dfb8f9
1 parent 4a01c2b
commit 8dfb8f9
Show file tree

Hide file tree

Showing 30 changed files with 2,206 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,36 @@
+# Pytorch-lightning
+Seed for ML research
+
+## Usage
+
+### Add new model
+1. Create a new model under /models.
+2. Add model name to trainer_main
+```python
+AVAILABLE_MODELS = {
+    'model_1': ExampleModel1
+}
+```
+
+### Model methods that can be implemented
+
+| Method | Purpose  | Input  | Output  | Required  |
+|---|---|---|---|---|
+| forward()  | Forward pass   | model_in tuple with your data  | model_out tuple to be passed to loss  | Y  |
+| loss()  | calculate model loss  | model_out tuple from forward()  | A scalar  | Y  |
+| check_performance()  | run a full loop through val data to check for metrics  | dataloader, nb_tests  | metrics tuple to be tracked  | Y  |
+| tng_dataloader  | Computed option, used to feed tng data  | -  | Pytorch DataLoader subclass  | Y  |
+| val_dataloader  | Computed option, used to feed tng data  | -  | Pytorch DataLoader subclass  | Y  |
+| test_dataloader  | Computed option, used to feed tng data  | -  | Pytorch DataLoader subclass  | Y  |
+
+### Model lifecycle hooks
+Use these hooks to customize functionality
+
+| Method | Purpose  | Input  | Output  | Required  |
+|---|---|---|---|---|
+| on_batch_start()  | called right before the batch starts | - | -  | N  |
+| on_batch_end()  | called right after the batch ends | - | -  | N  |
+| on_epoch_start()  | called right before the epoch starts | - | -  | N  |
+| on_epoch_end()  | called right afger the epoch ends | - | -  | N  |
+| on_pre_performance_check()  | called right before the performance check starts | - | -  | N  |
+| on_post_performance_check()  | called right after the batch starts | - | -  | N  |
diff --git a/__init__.py b/__init__.py
diff --git a/notebooks/__init__.py b/notebooks/__init__.py
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,27 @@
+atomicwrites==1.2.1
+attrs==18.2.0
+certifi==2018.11.29
+cffi==1.11.5
+h5py==2.9.0
+imageio==2.4.1
+mkl-fft==1.0.6
+mkl-random==1.0.2
+more-itertools==5.0.0
+numpy==1.15.4
+olefile==0.46
+pandas==0.23.4
+Pillow==5.3.0
+pluggy==0.8.0
+py==1.7.0
+pycparser==2.19
+pytest==4.0.2
+python-dateutil==2.7.5
+pytz==2018.7
+scikit-learn==0.20.2
+scipy==1.2.0
+six==1.12.0
+sklearn==0.0
+test-tube==0.6282
+torch==1.0.0
+torchvision==0.2.1
+tqdm==4.28.1
diff --git a/research_lib/__init__.py b/research_lib/__init__.py
diff --git a/research_lib/models/__init__.py b/research_lib/models/__init__.py
diff --git a/research_lib/models/model_examples/__init__.py b/research_lib/models/model_examples/__init__.py
diff --git a/research_lib/models/model_examples/bilstm.py b/research_lib/models/model_examples/bilstm.py
@@ -0,0 +1,167 @@
+import torch.nn as nn
+import numpy as np
+
+from test_tube import HyperOptArgumentParser
+import torch
+from torch.autograd import Variable
+from sklearn.metrics import confusion_matrix, f1_score
+from torch.nn import functional as F
+
+
+class BiLSTMPack(nn.Module):
+    """
+    Sample model to show how to define a template
+    """
+    def __init__(self, hparams):
+        # init superclass
+        super(BiLSTMPack, self).__init__(hparams)
+
+        self.hidden = None
+
+        # trigger tag building
+        self.ner_tagset = {'O': 0, 'I-Bio': 1}
+        self.nb_tags = len(self.ner_tagset)
+
+        # build model
+        print('building model...')
+        if hparams.model_load_weights_path is None:
+            self.__build_model()
+            print('model built')
+        else:
+            self = BiLSTMPack.load(hparams.model_load_weights_path, hparams.on_gpu, hparams)
+            print('model loaded from: {}'.format(hparams.model_load_weights_path))
+
+    def __build_model(self):
+        """
+        Layout model
+        :return:
+        """
+        # design the number of final units
+        self.output_dim = self.hparams.nb_lstm_units
+
+        # when it's bidirectional our weights double
+        if self.hparams.bidirectional:
+            self.output_dim *= 2
+
+        # total number of words
+        total_words = len(self.tng_dataloader.dataset.words_token_to_idx)
+
+        # word embeddings
+        self.word_embedding = nn.Embedding(
+            num_embeddings=total_words + 1,
+            embedding_dim=self.hparams.embedding_dim,
+            padding_idx=0
+        )
+
+        # design the LSTM
+        self.lstm = nn.LSTM(
+            self.hparams.embedding_dim,
+            self.hparams.nb_lstm_units,
+            num_layers=self.hparams.nb_lstm_layers,
+            bidirectional=self.hparams.bidirectional,
+            dropout=self.hparams.drop_prob,
+            batch_first=True,
+        )
+
+        # map to tag space
+        self.fc_out = nn.Linear(self.output_dim, self.out_dim)
+        self.hidden_to_tag = nn.Linear(self.output_dim, self.nb_tags)
+
+
+    def init_hidden(self, batch_size):
+
+        # the weights are of the form (nb_layers * 2 if bidirectional, batch_size, nb_lstm_units)
+        mult = 2 if self.hparams.bidirectional else 1
+        hidden_a = torch.randn(self.hparams.nb_layers * mult, batch_size, self.nb_rnn_units)
+        hidden_b = torch.randn(self.hparams.nb_layers * mult, batch_size, self.nb_rnn_units)
+
+        if self.hparams.on_gpu:
+            hidden_a = hidden_a.cuda()
+            hidden_b = hidden_b.cuda()
+
+        hidden_a = Variable(hidden_a)
+        hidden_b = Variable(hidden_b)
+
+        return (hidden_a, hidden_b)
+
+    def forward(self, model_in):
+        # layout data (expand it, etc...)
+        # x = sequences
+        x, seq_lengths = model_in
+        batch_size, seq_len = x.size()
+
+        # reset RNN hidden state
+        self.hidden = self.init_hidden(batch_size)
+
+        # embed
+        x = self.word_embedding(x)
+
+        # run through rnn using packed sequences
+        x = torch.nn.utils.rnn.pack_padded_sequence(x, seq_lengths, batch_first=True)
+        x, self.hidden = self.lstm(x, self.hidden)
+        x, _ = torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True)
+
+        # if asked for only last state, use the h_n which is the same as out(t=n)
+        if not self.return_sequence:
+            # pull out hidden states
+            # h_n = (nb_directions * nb_layers, batch_size, emb_size)
+            nb_directions = 2 if self.bidirectional else 1
+            (h_n, _) = self.hidden
+
+            # reshape to make indexing easier
+            # forward = 0, backward = 1 (of nb_directions)
+            h_n = h_n.view(self.nb_layers, nb_directions, batch_size, self.nb_rnn_units)
+
+            # pull out last forward
+            forward_h_n = h_n[-1, 0, :, :]
+            x = forward_h_n
+
+            # if bidirectional, also pull out the last hidden of backward network
+            if self.bidirectional:
+                backward_h_n = h_n[-1, 1, :, :]
+                x = torch.cat([forward_h_n, backward_h_n], dim=1)
+
+        # project to tag space
+        x = x.contiguous()
+        x = x.view(-1, self.output_dim)
+        x = self.hidden_to_tag(x)
+
+        return x
+
+    def loss(self, model_out):
+        # cross entropy loss
+        logits, y = model_out
+        y, y_lens = y
+
+        # flatten y and logits
+        y = y.view(-1)
+        logits = logits.view(-1, self.nb_tags)
+
+        # calculate a mask to remove padding tokens
+        mask = (y >= 0).float()
+
+        # count how many tokens we have
+        num_tokens = int(torch.sum(mask).data[0])
+
+        # pick the correct values and mask out
+        logits = logits[range(logits.shape[0]), y] * mask
+
+        # compute the ce loss
+        ce_loss = -torch.sum(logits)/num_tokens
+
+        return ce_loss
+
+    def pull_out_last_embedding(self, x, seq_lengths, batch_size, on_gpu):
+        # grab only the last activations from the non-padded ouput
+        x_last = torch.zeros([batch_size, 1, x.size(-1)])
+        for i, seq_len in enumerate(seq_lengths):
+            x_last[i, :, :] = x[i, seq_len-1, :]
+
+        # put on gpu when requested
+        if on_gpu:
+            x_last = x_last.cuda()
+
+        # turn into torch var
+        x_last = Variable(x_last)
+
+        return x_last
diff --git a/research_lib/models/sample_model_template/__init__.py b/research_lib/models/sample_model_template/__init__.py