Skip to content

Commit

Permalink
Merge branch 'main' into 'feat/tensorboard'
Browse files Browse the repository at this point in the history
# Conflicts:
#   .gitignore
#   multitask_classifier.py
  • Loading branch information
ItsNiklas committed Jun 25, 2023
2 parents 9804eb7 + 8549c5a commit 5348a65
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 26 deletions.
2 changes: 2 additions & 0 deletions base_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
# Instantiate model.
model = cls(config, *model_args, **model_kwargs)

print("Loading checkpoint weights from", resolved_archive_file, "...")
if state_dict is None:
try:
state_dict = torch.load(resolved_archive_file, map_location="cpu")
Expand All @@ -118,6 +119,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
f"Unable to load weights from pytorch checkpoint file for '{pretrained_model_name_or_path}' "
f"at '{resolved_archive_file}'"
)
print("Loaded weights.")

missing_keys = []
unexpected_keys = []
Expand Down
13 changes: 7 additions & 6 deletions classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ def __init__(self, config):
elif config.option == 'finetune':
param.requires_grad = True

### TODO
self.dropout = nn.Dropout(config.hidden_dropout_prob)
# self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.linear_layer = nn.Linear(config.hidden_size, self.num_labels)
# raise NotImplementedError

def forward(self, input_ids, attention_mask):
'''Takes a batch of sentences and returns logits for sentiment classes'''
Expand All @@ -62,7 +60,7 @@ def forward(self, input_ids, attention_mask):
# the training loop currently uses F.cross_entropy as the loss function.
# Cross entropy already has a softmax therefore this should be okay
result = self.bert(input_ids, attention_mask)
return self.linear_layer(self.dropout(result['pooler_output']))
return self.linear_layer(result['pooler_output'])


class SentimentDataset(Dataset):
Expand Down Expand Up @@ -364,9 +362,12 @@ def get_args():

parser.add_argument("--batch_size", help='sst: 64, cfimdb: 8 can fit a 12GB GPU', type=int, default=8)
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5)

args, _ = parser.parse_known_args()

parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5 if args.option == 'finetune' else 1e-3)

args = parser.parse_args()
return args

Expand Down
41 changes: 29 additions & 12 deletions multitask_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,43 +55,60 @@ def __init__(self, config):
elif config.option == 'finetune':
param.requires_grad = True

self.linear_layer = nn.Linear(config.hidden_size, config.num_labels)

self.paraphrase_linear = nn.Linear(config.hidden_size, 1)
self.similarity_linear = nn.Linear(config.hidden_size, 1)

def forward(self, input_ids, attention_mask):
'Takes a batch of sentences and produces embeddings for them.'
# The final BERT embedding is the hidden state of [CLS] token (the first token)
# Here, you can start by just returning the embeddings straight from BERT.
# When thinking of improvements, you can later try modifying this
# (e.g., by adding other layers).
### TODO
raise NotImplementedError

result = self.bert(input_ids, attention_mask)
return result['pooler_output']

def predict_sentiment(self, input_ids, attention_mask):
'''Given a batch of sentences, outputs logits for classifying sentiment.
There are 5 sentiment classes:
(0 - negative, 1- somewhat negative, 2- neutral, 3- somewhat positive, 4- positive)
Thus, your output should contain 5 logits for each sentence.
'''
### TODO
raise NotImplementedError
return self.linear_layer(forward(input_ids, attention_mask))

def predict_paraphrase(self,
input_ids_1, attention_mask_1,
input_ids_2, attention_mask_2):
'''Given a batch of pairs of sentences, outputs a single logit for predicting whether they are paraphrases.
"""
Given a batch of pairs of sentences, outputs a single logit for predicting whether they are paraphrases.
Note that your output should be unnormalized (a logit); it will be passed to the sigmoid function
during evaluation, and handled as a logit by the appropriate loss function.
'''
### TODO
raise NotImplementedError
"""

bert_result_1 = self.forward(input_ids_1, attention_mask_1)
bert_result_2 = self.forward(input_ids_2, attention_mask_2)

diff = torch.cosine_similarity(bert_result_1, bert_result_2)

return self.paraphrase_linear(diff)

def predict_similarity(self,
input_ids_1, attention_mask_1,
input_ids_2, attention_mask_2):
'''Given a batch of pairs of sentences, outputs a single logit corresponding to how similar they are.
"""
Given a batch of pairs of sentences, outputs a single logit corresponding to how similar they are.
Note that your output should be unnormalized (a logit); it will be passed to the sigmoid function
during evaluation, and handled as a logit by the appropriate loss function.
'''
### TODO
raise NotImplementedError
"""

bert_embeddings_1 = self.forward(input_ids_1, attention_mask_1)
bert_embeddings_2 = self.forward(input_ids_2, attention_mask_2)

diff = torch.cosine_similarity(bert_embeddings_1, bert_embeddings_2)

return self.similarity_linear(diff)


def save_model(model, optimizer, args, config, filepath):
Expand Down
4 changes: 2 additions & 2 deletions run_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

module load anaconda3
module load cuda
source activate dl-gpu # Or whatever you called your environment.
source activate dnlp2 # Or whatever you called your environment.

# Printing out some info.
echo "Submitting job with sbatch from directory: ${SLURM_SUBMIT_DIR}"
Expand All @@ -28,4 +28,4 @@ python -m torch.utils.collect_env
nvcc -V

# Run the script:
python -u multitask_classifier.py --use_gpu
python -u multitask_classifier.py --use_gpu
8 changes: 4 additions & 4 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/usr/bin/env bash

conda create -n dnlp python=3.8
conda activate dnlp
conda create -n dnlp2 python=3.10
conda activate dnlp2

conda install pytorch==1.8.0 torchvision torchaudio cudatoolkit=10.1 -c pytorch
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
pip install tqdm==4.58.0
pip install requests==2.25.1
pip install importlib-metadata==3.7.0
pip install filelock==3.0.12
pip install sklearn==0.0
pip install tokenizers==0.10.1
pip install tokenizers==0.13.2
pip install explainaboard_client==0.0.7
pip install tensorboard
12 changes: 10 additions & 2 deletions utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Dict, List, Optional, Union, Tuple, BinaryIO
import fnmatch
import socket
import os
import sys
import json
Expand Down Expand Up @@ -145,7 +147,7 @@ def get_from_cache(

url_to_download = url
etag = None
if not local_files_only:
if not 'gpu' in socket.gethostname() and not local_files_only:
try:
r = requests.head(url, headers=headers, allow_redirects=False, proxies=proxies, timeout=etag_timeout)
r.raise_for_status()
Expand Down Expand Up @@ -205,6 +207,12 @@ def get_from_cache(
if os.path.exists(cache_path) and not force_download:
return cache_path

if 'gpu' in socket.gethostname():
raise FileNotFoundError(
"Cannot find the requested files in the cached path and outgoing traffic has been"
" is not enabled."
)

# Prevent parallel downloads of the same file with a lock.
lock_path = cache_path + ".lock"
with FileLock(lock_path):
Expand Down Expand Up @@ -343,4 +351,4 @@ def get_extended_attention_mask(attention_mask: Tensor, dtype) -> Tensor:
extended_attention_mask = attention_mask[:, None, None, :]
extended_attention_mask = extended_attention_mask.to(dtype=dtype) # fp16 compatibility
extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
return extended_attention_mask
return extended_attention_mask

0 comments on commit 5348a65

Please sign in to comment.