Skip to content

Commit

Permalink
🔀 Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
local_files_only not used.

Conflicts:
	classifier.py
	multitask_classifier.py
	utils.py
  • Loading branch information
ItsNiklas committed Jul 13, 2023
2 parents 7bcead8 + c6b33e8 commit e27c697
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 14 deletions.
20 changes: 12 additions & 8 deletions classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class BertSentimentClassifier(torch.nn.Module):
def __init__(self, config):
super(BertSentimentClassifier, self).__init__()
self.num_labels = config.num_labels
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.bert = BertModel.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

# Pretrain mode does not require updating bert paramters.
for param in self.bert.parameters():
Expand All @@ -67,7 +67,7 @@ class SentimentDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -105,7 +105,7 @@ class SentimentTestDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -253,7 +253,8 @@ def train(args):
'num_labels': num_labels,
'hidden_size': 768,
'data_dir': '.',
'option': args.option}
'option': args.option,
'local_files_only': args.local_files_only}

config = SimpleNamespace(**config)

Expand Down Expand Up @@ -356,13 +357,15 @@ def get_args():
help='pretrain: the BERT parameters are frozen; finetune: BERT parameters are updated',
choices=('pretrain', 'finetune'), default="pretrain")
parser.add_argument("--use_gpu", action='store_true')
parser.add_argument("--dev_out", type=str, default="cfimdb-dev-output.txt")
parser.add_argument("--test_out", type=str, default="cfimdb-test-output.txt")

parser.add_argument("--logdir", type=str, default="logdir")
parser.add_argument("--dev_out", type=str, default="sst-dev-out.csv")
parser.add_argument("--test_out", type=str, default="sst-test-out.csv")


parser.add_argument("--batch_size", help='sst: 64, cfimdb: 8 can fit a 12GB GPU', type=int, default=8)
parser.add_argument("--batch_size", help='sst: 64 can fit a 12GB GPU', type=int, default=64)
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
parser.add_argument("--local_files_only", action='store_true')

args, _ = parser.parse_known_args()

Expand Down Expand Up @@ -392,7 +395,8 @@ def get_args():
option=args.option,
dev_out='predictions/' + args.option + '-sst-dev-out.csv',
test_out='predictions/' + args.option + '-sst-test-out.csv',
logdir=args.logdir
logdir=args.logdir,
local_files_only=args.local_files_only
)

train(config)
Expand Down
8 changes: 4 additions & 4 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class SentenceClassificationDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -65,7 +65,7 @@ class SentenceClassificationTestDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -101,7 +101,7 @@ def __init__(self, dataset, args, isRegression=False):
self.dataset = dataset
self.p = args
self.isRegression = isRegression
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -157,7 +157,7 @@ class SentencePairTestDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)

def __len__(self):
return len(self.dataset)
Expand Down
6 changes: 4 additions & 2 deletions multitask_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ def train_multitask(args):
'model': args.model,
'hidden_size': hidden_size[args.model],
'data_dir': '.',
'option': args.option}
'option': args.option,
'local_files_only': args.local_files_only}

config = SimpleNamespace(**config)

Expand Down Expand Up @@ -390,11 +391,12 @@ def get_args():
parser.add_argument("--logdir", type=str, default="logdir")

# hyper parameters
parser.add_argument("--batch_size", help='sst: 64, cfimdb: 8 can fit a 12GB GPU', type=int, default=8)
parser.add_argument("--batch_size", help='sst: 64 can fit a 12GB GPU', type=int, default=64)
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5)
parser.add_argument("--checkpoint", type=str, default=None)
parser.add_argument("--local_files_only", action='store_true')

args = parser.parse_args()
return args
Expand Down
7 changes: 7 additions & 0 deletions setup_gwdg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from tokenizer import BertTokenizer
from bert import BertModel

if __name__ == "__main__":
# Download files
BertTokenizer.from_pretrained('bert-base-uncased')
BertModel.from_pretrained('bert-base-uncased')
14 changes: 14 additions & 0 deletions setup_gwdg.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

module load anaconda3
conda create -n dnlp-gwdg python=3.8
source activate dnlp-gwdg

pip install torch==2.0.0+cu117 torchvision==0.15.1+cu117 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu117
pip install tqdm==4.58.0
pip install requests==2.25.1
pip install importlib-metadata==3.7.0
pip install filelock==3.0.12
pip install sklearn==0.0
pip install tokenizers==0.10.1
pip install explainaboard_client==0.0.7

0 comments on commit e27c697

Please sign in to comment.