Skip to content

Commit

Permalink
Merge branch 'main' into feat/sophia
Browse files Browse the repository at this point in the history
  • Loading branch information
ItsNiklas committed Jun 25, 2023
2 parents 56e24dd + b142ce1 commit ea20e92
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 31 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.idea
__pycache__
sst-classifier.pt
sst-classifier.pt
logdir
48 changes: 30 additions & 18 deletions classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time, random, numpy as np, argparse, sys, re, os
from datetime import datetime
from types import SimpleNamespace
import csv

Expand All @@ -7,6 +8,7 @@
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, f1_score, recall_score, accuracy_score
from torch.utils.tensorboard import SummaryWriter

# change it with respect to the original model
from tokenizer import BertTokenizer
Expand Down Expand Up @@ -48,10 +50,8 @@ def __init__(self, config):
elif config.option == "finetune":
param.requires_grad = True

### TODO
self.dropout = nn.Dropout(config.hidden_dropout_prob)
# self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.linear_layer = nn.Linear(config.hidden_size, self.num_labels)
# raise NotImplementedError

def forward(self, input_ids, attention_mask):
"""Takes a batch of sentences and returns logits for sentiment classes"""
Expand All @@ -60,7 +60,7 @@ def forward(self, input_ids, attention_mask):
# the training loop currently uses F.cross_entropy as the loss function.
# Cross entropy already has a softmax therefore this should be okay
result = self.bert(input_ids, attention_mask)
return self.linear_layer(self.dropout(result["pooler_output"]))
return self.linear_layer(result['pooler_output'])


class SentimentDataset(Dataset):
Expand Down Expand Up @@ -241,7 +241,11 @@ def save_model(model, optimizer, args, config, filepath):


def train(args):
device = torch.device("cuda") if args.use_gpu else torch.device("cpu")
name = datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(log_dir=args.logdir + "/classifier/" + name)
loss_idx_value = 0

device = torch.device('cuda') if args.use_gpu else torch.device('cpu')
# Load data
# Create the data and its corresponding datasets and dataloader
train_data, num_labels = load_data(args.train, "train")
Expand Down Expand Up @@ -319,21 +323,29 @@ def train(args):
optimizer.zero_grad(set_to_none=True)

train_loss += loss.item()
writer.add_scalar("Loss/Minibatches", loss.item(), loss_idx_value)
loss_idx_value += 1
num_batches += 1
iter_num += 1

train_loss = train_loss / (num_batches)
writer.add_scalar("Loss/Epochs", train_loss, epoch)

train_acc, train_f1, *_ = model_eval(train_dataloader, model, device)
writer.add_scalar("Accuracy/train/Epochs", train_acc, epoch)
writer.add_scalar("F1_score/train/Epochs", train_f1, epoch)

dev_acc, dev_f1, *_ = model_eval(dev_dataloader, model, device)
writer.add_scalar("Accuracy/dev/Epochs", dev_acc, epoch)
writer.add_scalar("F1_score/dev/Epochs", dev_f1, epoch)

if dev_acc > best_dev_acc:
best_dev_acc = dev_acc
save_model(model, optimizer, args, config, args.filepath)

print(
f"Epoch {epoch}: train loss :: {train_loss :.3f}, train acc :: {train_acc :.3f}, dev acc :: {dev_acc :.3f}"
)
f"Epoch {epoch}: train loss :: {train_loss :.3f}, train acc :: {train_acc :.3f}, dev acc :: {dev_acc :.3f}")
writer.close()


def test(args):
Expand Down Expand Up @@ -397,17 +409,16 @@ def get_args():
parser.add_argument("--dev_out", type=str, default="cfimdb-dev-output.txt")
parser.add_argument("--test_out", type=str, default="cfimdb-test-output.txt")

parser.add_argument(
"--batch_size", help="sst: 64, cfimdb: 8 can fit a 12GB GPU", type=int, default=8
)
parser.add_argument("--logdir", type=str, default="logdir")

parser.add_argument("--batch_size", help='sst: 64, cfimdb: 8 can fit a 12GB GPU', type=int, default=8)
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
parser.add_argument(
"--lr",
type=float,
help="learning rate (AdamW), default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5,
)

args, _ = parser.parse_known_args()

parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5 if args.option == 'finetune' else 1e-3)

args = parser.parse_args()
return args

Expand All @@ -429,8 +440,9 @@ def get_args():
dev="data/ids-sst-dev.csv",
test="data/ids-sst-test-student.csv",
option=args.option,
dev_out="predictions/" + args.option + "-sst-dev-out.csv",
test_out="predictions/" + args.option + "-sst-test-out.csv",
dev_out='predictions/' + args.option + '-sst-dev-out.csv',
test_out='predictions/' + args.option + '-sst-test-out.csv',
logdir=args.logdir
)

train(config)
Expand Down
64 changes: 52 additions & 12 deletions multitask_classifier.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import time, random, numpy as np, argparse, sys, re, os
from datetime import datetime
from types import SimpleNamespace

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from bert import BertModel
from optimizer import AdamW
Expand Down Expand Up @@ -53,43 +55,60 @@ def __init__(self, config):
elif config.option == 'finetune':
param.requires_grad = True

self.linear_layer = nn.Linear(config.hidden_size, config.num_labels)

self.paraphrase_linear = nn.Linear(config.hidden_size, 1)
self.similarity_linear = nn.Linear(config.hidden_size, 1)

def forward(self, input_ids, attention_mask):
'Takes a batch of sentences and produces embeddings for them.'
# The final BERT embedding is the hidden state of [CLS] token (the first token)
# Here, you can start by just returning the embeddings straight from BERT.
# When thinking of improvements, you can later try modifying this
# (e.g., by adding other layers).
### TODO
raise NotImplementedError

result = self.bert(input_ids, attention_mask)
return result['pooler_output']

def predict_sentiment(self, input_ids, attention_mask):
'''Given a batch of sentences, outputs logits for classifying sentiment.
There are 5 sentiment classes:
(0 - negative, 1- somewhat negative, 2- neutral, 3- somewhat positive, 4- positive)
Thus, your output should contain 5 logits for each sentence.
'''
### TODO
raise NotImplementedError
return self.linear_layer(forward(input_ids, attention_mask))

def predict_paraphrase(self,
input_ids_1, attention_mask_1,
input_ids_2, attention_mask_2):
'''Given a batch of pairs of sentences, outputs a single logit for predicting whether they are paraphrases.
"""
Given a batch of pairs of sentences, outputs a single logit for predicting whether they are paraphrases.
Note that your output should be unnormalized (a logit); it will be passed to the sigmoid function
during evaluation, and handled as a logit by the appropriate loss function.
'''
### TODO
raise NotImplementedError
"""

bert_result_1 = self.forward(input_ids_1, attention_mask_1)
bert_result_2 = self.forward(input_ids_2, attention_mask_2)

diff = torch.cosine_similarity(bert_result_1, bert_result_2)

return self.paraphrase_linear(diff)

def predict_similarity(self,
input_ids_1, attention_mask_1,
input_ids_2, attention_mask_2):
'''Given a batch of pairs of sentences, outputs a single logit corresponding to how similar they are.
"""
Given a batch of pairs of sentences, outputs a single logit corresponding to how similar they are.
Note that your output should be unnormalized (a logit); it will be passed to the sigmoid function
during evaluation, and handled as a logit by the appropriate loss function.
'''
### TODO
raise NotImplementedError
"""

bert_embeddings_1 = self.forward(input_ids_1, attention_mask_1)
bert_embeddings_2 = self.forward(input_ids_2, attention_mask_2)

diff = torch.cosine_similarity(bert_embeddings_1, bert_embeddings_2)

return self.similarity_linear(diff)


def save_model(model, optimizer, args, config, filepath):
Expand All @@ -109,6 +128,10 @@ def save_model(model, optimizer, args, config, filepath):

## Currently only trains on sst dataset
def train_multitask(args):
name = datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(log_dir=args.logdir + "/multitask_classifier/" + name)
loss_idx_value = 0

device = torch.device('cuda') if args.use_gpu else torch.device('cpu')
# Load data
# Create the data and its corresponding datasets and dataloader
Expand Down Expand Up @@ -185,6 +208,7 @@ def train_multitask(args):
sts_loss = F.nll_loss(logits, b_labels.view(-1))

train_loss += sts_loss.item()
writer.add_scalar("Loss/STS/Minibatches", sts_loss.item(), loss_idx_value)
num_batches += 1

# Train on PARAPHRASE dataset
Expand All @@ -206,6 +230,7 @@ def train_multitask(args):
para_loss = F.nll_loss(logits, b_labels.view(-1))

train_loss += para_loss.item()
writer.add_scalar("Loss/PARA/Minibatches", para_loss.item(), loss_idx_value)
num_batches += 1

# Train on SST dataset
Expand All @@ -222,6 +247,8 @@ def train_multitask(args):
sst_loss = F.cross_entropy(logits, b_labels.view(-1))

train_loss += sst_loss.item()
writer.add_scalar("Loss/SST/Minibatches", sst_loss.item(), loss_idx_value)
loss_idx_value += 1
num_batches += 1

# Calculate gradient and update weights
Expand All @@ -231,15 +258,23 @@ def train_multitask(args):
optimizer.step()

train_loss = train_loss / num_batches
writer.add_scalar("Loss/Epochs", train_loss, epoch)

para_train_acc, _, _, sst_train_acc, _, _, sts_train_acc, _, _ = model_eval_multitask(sst_train_dataloader,
para_train_dataloader,
sts_train_dataloader,
model, device)
writer.add_scalar("para_acc/train/Epochs", para_train_acc, epoch)
writer.add_scalar("sst_acc/train/Epochs", sst_train_acc, epoch)
writer.add_scalar("sts_acc/train/Epochs", sts_train_acc, epoch)

para_dev_acc, _, _, sst_dev_acc, _, _, sts_dev_acc, _, _ = model_eval_multitask(sst_dev_dataloader,
para_dev_dataloader,
sts_dev_dataloader, model,
device)
writer.add_scalar("para_acc/dev/Epochs", para_dev_acc, epoch)
writer.add_scalar("sst_acc/dev/Epochs", sst_dev_acc, epoch)
writer.add_scalar("sts_acc/dev/Epochs", sts_dev_acc, epoch)

if para_dev_acc > best_dev_acc_para and sst_dev_acc > best_dev_acc_sst and sts_dev_acc > best_dev_acc_sts:
best_dev_acc_para = para_dev_acc
Expand All @@ -248,6 +283,9 @@ def train_multitask(args):
save_model(model, optimizer, args, config, args.filepath)
train_acc = sst_train_acc + para_train_acc + sts_train_acc
dev_acc = sst_dev_acc + para_dev_acc + sts_dev_acc

writer.add_scalar("acc/train/Epochs", train_acc, epoch)
writer.add_scalar("acc/dev/Epochs", dev_acc, epoch)
print(
f"Epoch {epoch}: train loss :: {train_loss :.3f}, combined train acc :: {train_acc :.3f}, combined dev acc :: {dev_acc :.3f}")

Expand Down Expand Up @@ -296,6 +334,8 @@ def get_args():
parser.add_argument("--sts_dev_out", type=str, default="predictions/sts-dev-output.csv")
parser.add_argument("--sts_test_out", type=str, default="predictions/sts-test-output.csv")

parser.add_argument("--logdir", type=str, default="logdir")

# hyper parameters
parser.add_argument("--batch_size", help='sst: 64, cfimdb: 8 can fit a 12GB GPU', type=int, default=8)
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
Expand Down
1 change: 1 addition & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ pip install filelock==3.0.12
pip install sklearn==0.0
pip install tokenizers==0.13.2
pip install explainaboard_client==0.0.7
pip install tensorboard

0 comments on commit ea20e92

Please sign in to comment.