Skip to content

Commit

Permalink
🧑‍💻 Black
Browse files Browse the repository at this point in the history
black'ed all files (L100)
  • Loading branch information
ItsNiklas committed Aug 22, 2023
1 parent 5ee9df9 commit cc3559b
Show file tree
Hide file tree
Showing 11 changed files with 4,072 additions and 3,672 deletions.
492 changes: 250 additions & 242 deletions base_bert.py

Large diffs are not rendered by default.

34 changes: 24 additions & 10 deletions bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,19 @@ def forward(self, hidden_states, attention_mask):
# apply multi-head attention
multi_head = self.self_attention(hidden_states, attention_mask)

add_norm_1 = self.add_norm(hidden_states, multi_head, self.attention_dense, self.attention_dropout,
self.attention_layer_norm)
add_norm_1 = self.add_norm(
hidden_states,
multi_head,
self.attention_dense,
self.attention_dropout,
self.attention_layer_norm,
)

feed_forward = self.interm_af(self.interm_dense(add_norm_1))

add_norm_2 = self.add_norm(add_norm_1, feed_forward, self.out_dense, self.out_dropout,
self.out_layer_norm)
add_norm_2 = self.add_norm(
add_norm_1, feed_forward, self.out_dense, self.out_dropout, self.out_layer_norm
)
return add_norm_2


Expand All @@ -139,17 +145,21 @@ def __init__(self, config):
self.config = config

# embedding
self.word_embedding = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
self.word_embedding = nn.Embedding(
config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id
)
self.pos_embedding = nn.Embedding(config.max_position_embeddings, config.hidden_size)
self.tk_type_embedding = nn.Embedding(config.type_vocab_size, config.hidden_size)
self.embed_layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
self.embed_dropout = nn.Dropout(config.hidden_dropout_prob)
# position_ids (1, len position emb) is a constant, register to buffer
position_ids = torch.arange(config.max_position_embeddings).unsqueeze(0)
self.register_buffer('position_ids', position_ids)
self.register_buffer("position_ids", position_ids)

# bert encoder
self.bert_layers = nn.ModuleList([BertLayer(config) for _ in range(config.num_hidden_layers)])
self.bert_layers = nn.ModuleList(
[BertLayer(config) for _ in range(config.num_hidden_layers)]
)

# for [CLS] token
self.pooler_dense = nn.Linear(config.hidden_size, config.hidden_size)
Expand All @@ -174,7 +184,9 @@ def embed(self, input_ids):
tk_type_embeds = self.tk_type_embedding(tk_type_ids)

# Add three embeddings together; then apply embed_layer_norm and dropout and return.
return self.embed_dropout(self.embed_layer_norm(inputs_embeds + pos_embeds + tk_type_embeds))
return self.embed_dropout(
self.embed_layer_norm(inputs_embeds + pos_embeds + tk_type_embeds)
)

def encode(self, hidden_states, attention_mask):
"""
Expand All @@ -184,7 +196,9 @@ def encode(self, hidden_states, attention_mask):
# get the extended attention mask for self attention
# returns extended_attention_mask of [batch_size, 1, 1, seq_len]
# non-padding tokens with 0 and padding tokens with a large negative number
extended_attention_mask: torch.Tensor = get_extended_attention_mask(attention_mask, self.dtype)
extended_attention_mask: torch.Tensor = get_extended_attention_mask(
attention_mask, self.dtype
)

# pass the hidden states through the encoder layers
for i, layer_module in enumerate(self.bert_layers):
Expand All @@ -209,4 +223,4 @@ def forward(self, input_ids, attention_mask):
first_tk = self.pooler_dense(first_tk)
first_tk = self.pooler_af(first_tk)

return {'last_hidden_state': sequence_output, 'pooler_output': first_tk}
return {"last_hidden_state": sequence_output, "pooler_output": first_tk}
78 changes: 52 additions & 26 deletions classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from torch.utils.tensorboard import SummaryWriter

from AttentionLayer import AttentionLayer

# change it with respect to the original model
from tokenizer import BertTokenizer
from bert import BertModel
Expand Down Expand Up @@ -43,7 +44,9 @@ class BertSentimentClassifier(torch.nn.Module):
def __init__(self, config):
super(BertSentimentClassifier, self).__init__()
self.num_labels = config.num_labels
self.bert = BertModel.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)
self.bert = BertModel.from_pretrained(
"bert-base-uncased", local_files_only=args.local_files_only
)

# Pretrain mode does not require updating bert paramters.
for param in self.bert.parameters():
Expand All @@ -66,15 +69,17 @@ def forward(self, input_ids, attention_mask):

# No Dropout because it is the last layer before softmax, else worse performance
result = self.bert(input_ids, attention_mask)
attention_result = self.attention_layer(result['last_hidden_state'])
attention_result = self.attention_layer(result["last_hidden_state"])
return self.linear_layer(attention_result)


class SentimentDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)
self.tokenizer = BertTokenizer.from_pretrained(
"bert-base-uncased", local_files_only=args.local_files_only
)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -112,7 +117,9 @@ class SentimentTestDataset(Dataset):
def __init__(self, dataset, args):
self.dataset = dataset
self.p = args
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', local_files_only=args.local_files_only)
self.tokenizer = BertTokenizer.from_pretrained(
"bert-base-uncased", local_files_only=args.local_files_only
)

def __len__(self):
return len(self.dataset)
Expand Down Expand Up @@ -250,7 +257,7 @@ def save_model(model, optimizer, args, config, filepath):
def train(args):
loss_idx_value = 0

device = torch.device('cuda') if args.use_gpu else torch.device('cpu')
device = torch.device("cuda") if args.use_gpu else torch.device("cpu")
# Load data
# Create the data and its corresponding datasets and dataloader
train_data, num_labels = load_data(args.train, "train")
Expand All @@ -267,15 +274,21 @@ def train(args):
)

# Init model
config = {'hidden_dropout_prob': args.hidden_dropout_prob,
'num_labels': num_labels,
'hidden_size': 768,
'data_dir': '.',
'option': args.option,
'local_files_only': args.local_files_only}
config = {
"hidden_dropout_prob": args.hidden_dropout_prob,
"num_labels": num_labels,
"hidden_size": 768,
"data_dir": ".",
"option": args.option,
"local_files_only": args.local_files_only,
}

config = SimpleNamespace(**config)
ctx = nullcontext() if not args.use_gpu else torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16)
ctx = (
nullcontext()
if not args.use_gpu
else torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16)
)

model = BertSentimentClassifier(config)
model = model.to(device)
Expand All @@ -284,15 +297,19 @@ def train(args):
if args.optimizer == "adamw":
optimizer = AdamW(model.parameters(), lr=lr)
elif args.optimizer == "sophiag":
optimizer = SophiaG(model.parameters(), lr=lr, eps=1e-12, rho=0.03, betas=(0.985, 0.99), weight_decay=2e-1)
optimizer = SophiaG(
model.parameters(), lr=lr, eps=1e-12, rho=0.03, betas=(0.985, 0.99), weight_decay=2e-1
)

hess_interval = 10
iter_num = 0

best_dev_acc = 0

# Initialize the tensorboard writer
name = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-lr={lr}-optimizer={type(optimizer).__name__}"
name = (
f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-lr={lr}-optimizer={type(optimizer).__name__}"
)
writer = SummaryWriter(log_dir=args.logdir + "/classifier/" + name)

# Run for the specified number of epochs
Expand Down Expand Up @@ -361,7 +378,8 @@ def train(args):
save_model(model, optimizer, args, config, args.filepath)

print(
f"Epoch {epoch}: train loss :: {train_loss :.3f}, train acc :: {train_acc :.3f}, dev acc :: {dev_acc :.3f}")
f"Epoch {epoch}: train loss :: {train_loss :.3f}, train acc :: {train_acc :.3f}, dev acc :: {dev_acc :.3f}"
)
writer.close()


Expand Down Expand Up @@ -415,25 +433,33 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=11711)
parser.add_argument("--epochs", type=int, default=10)
parser.add_argument("--option", type=str,
help='pretrain: the BERT parameters are frozen; finetune: BERT parameters are updated',
choices=('pretrain', 'finetune'), default="pretrain")
parser.add_argument("--use_gpu", action='store_true')
parser.add_argument(
"--option",
type=str,
help="pretrain: the BERT parameters are frozen; finetune: BERT parameters are updated",
choices=("pretrain", "finetune"),
default="pretrain",
)
parser.add_argument("--use_gpu", action="store_true")

parser.add_argument("--logdir", type=str, default="logdir")
parser.add_argument("--dev_out", type=str, default="sst-dev-out.csv")
parser.add_argument("--test_out", type=str, default="sst-test-out.csv")

parser.add_argument("--batch_size", help='sst: 64 can fit a 12GB GPU', type=int, default=64)
parser.add_argument("--batch_size", help="sst: 64 can fit a 12GB GPU", type=int, default=64)
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
parser.add_argument("--optimizer", type=str, default="adamw")
parser.add_argument("--local_files_only", action='store_true')
parser.add_argument("--local_files_only", action="store_true")

args, _ = parser.parse_known_args()

# TODO: Possibly change defaults based on optimizer
parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5 if args.option == 'finetune' else 1e-3)
parser.add_argument(
"--lr",
type=float,
help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
default=1e-5 if args.option == "finetune" else 1e-3,
)

args = parser.parse_args()
return args
Expand All @@ -456,11 +482,11 @@ def get_args():
dev="data/ids-sst-dev.csv",
test="data/ids-sst-test-student.csv",
option=args.option,
dev_out='predictions/' + args.option + '-sst-dev-out.csv',
test_out='predictions/' + args.option + '-sst-test-out.csv',
dev_out="predictions/" + args.option + "-sst-dev-out.csv",
test_out="predictions/" + args.option + "-sst-test-out.csv",
logdir=args.logdir,
optimizer=args.optimizer,
local_files_only=args.local_files_only
local_files_only=args.local_files_only,
)

train(config)
Expand Down
Loading

0 comments on commit cc3559b

Please sign in to comment.