Skip to content

Commit

Permalink
🧑‍💻 Import Ordering, Some Flake fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ItsNiklas committed Aug 22, 2023
1 parent cc3559b commit 6341263
Show file tree
Hide file tree
Showing 16 changed files with 149 additions and 133 deletions.
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,9 @@ repos:
- id: flake8
#additional_dependencies: [flake8-import-order]
args: ['--select=F,E9']
- repo: https://github.com/asottile/reorder-python-imports
rev: v3.10.0
hooks:
- id: reorder-python-imports
args: ['--py38-plus']

8 changes: 6 additions & 2 deletions base_bert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import re
from torch import device, dtype
from config import BertConfig, PretrainedConfig

from torch import device
from torch import dtype

from config import BertConfig
from config import PretrainedConfig
from utils import *


Expand Down
9 changes: 8 additions & 1 deletion bert.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
from typing import Dict, List, Optional, Union, Tuple, Callable
import math
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from typing import Union

import torch
import torch.nn as nn
import torch.nn.functional as F

from base_bert import BertPreTrainedModel
from utils import *

Expand Down
21 changes: 13 additions & 8 deletions classifier.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
import time, random, numpy as np, argparse, sys, re, os
import argparse
import csv
import random
from contextlib import nullcontext
from datetime import datetime
from types import SimpleNamespace
import csv

import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, f1_score, recall_score, accuracy_score
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from AttentionLayer import AttentionLayer
from bert import BertModel
from optimizer import AdamW
from optimizer import SophiaG
from tokenizer import BertTokenizer

# change it with respect to the original model
from tokenizer import BertTokenizer
from bert import BertModel
from optimizer import AdamW, SophiaG
from tqdm import tqdm

TQDM_DISABLE = False

Expand Down
17 changes: 11 additions & 6 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from typing import Union, Tuple, Dict, Any, Optional
import os
import json
from collections import OrderedDict
import torch
from utils import CONFIG_NAME, hf_bucket_url, cached_path, is_remote_url
import os
from typing import Any
from typing import Dict
from typing import Tuple
from typing import Union

from utils import cached_path
from utils import CONFIG_NAME
from utils import hf_bucket_url
from utils import is_remote_url


class PretrainedConfig(object):
Expand Down Expand Up @@ -168,7 +173,7 @@ def get_config_dict(
# Load config dict
config_dict = cls._dict_from_json_file(resolved_config_file)

except EnvironmentError as err:
except EnvironmentError:
msg = (
f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
Expand Down
12 changes: 5 additions & 7 deletions datasets.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
#!/usr/bin/env python3

"""
This module contains our Dataset classes and functions to load the 3 datasets we're using.
You should only need to call load_multitask_data to get the training and dev examples
to train your model.
"""

import csv
import random

import torch
from torch.utils.data import Dataset

from tokenizer import BertTokenizer
from random import randrange
import random


def preprocess_string(s):
Expand Down Expand Up @@ -263,9 +261,9 @@ def collate_fn(self, all_data):


def load_multitask_test_data():
paraphrase_filename = f"data/quora-test.csv"
sentiment_filename = f"data/ids-sst-test.txt"
similarity_filename = f"data/sts-test.csv"
paraphrase_filename = "data/quora-test.csv"
sentiment_filename = "data/ids-sst-test.txt"
similarity_filename = "data/sts-test.csv"

sentiment_data = []

Expand Down
48 changes: 21 additions & 27 deletions evaluation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3

"""
Model evaluation functions.
Expand All @@ -14,21 +13,18 @@
so unless you change it you shouldn't need to call anything from here
explicitly aside from model_eval_multitask.
"""

import numpy as np
import torch
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, f1_score, recall_score, accuracy_score
from tqdm import tqdm
import numpy as np

from datasets import (
load_multitask_data,
load_multitask_test_data,
SentenceClassificationDataset,
SentenceClassificationTestDataset,
SentencePairDataset,
SentencePairTestDataset,
)
from datasets import load_multitask_data
from datasets import SentenceClassificationDataset
from datasets import SentenceClassificationTestDataset
from datasets import SentencePairDataset
from datasets import SentencePairTestDataset

TQDM_DISABLE = False

Expand All @@ -40,7 +36,7 @@ def model_eval_sst(dataloader, model, device):
y_pred = []
sents = []
sent_ids = []
for step, batch in enumerate(tqdm(dataloader, desc=f"eval", disable=TQDM_DISABLE)):
for step, batch in enumerate(tqdm(dataloader, desc="eval", disable=TQDM_DISABLE)):
b_ids, b_mask, b_labels, b_sents, b_sent_ids = (
batch["token_ids"],
batch["attention_mask"],
Expand Down Expand Up @@ -82,7 +78,7 @@ def model_eval_multitask(
# Evaluate paraphrase detection.
if paraphrase_dataloader:
for step, batch in enumerate(
tqdm(paraphrase_dataloader, desc=f"eval", disable=TQDM_DISABLE)
tqdm(paraphrase_dataloader, desc="eval", disable=TQDM_DISABLE)
):
(b_ids1, b_mask1, b_ids2, b_mask2, b_labels, b_sent_ids) = (
batch["token_ids_1"],
Expand Down Expand Up @@ -116,7 +112,7 @@ def model_eval_multitask(

# Evaluate semantic textual similarity.
if sts_dataloader:
for step, batch in enumerate(tqdm(sts_dataloader, desc=f"eval", disable=TQDM_DISABLE)):
for step, batch in enumerate(tqdm(sts_dataloader, desc="eval", disable=TQDM_DISABLE)):
(b_ids1, b_mask1, b_ids2, b_mask2, b_labels, b_sent_ids) = (
batch["token_ids_1"],
batch["attention_mask_1"],
Expand Down Expand Up @@ -150,7 +146,7 @@ def model_eval_multitask(
# Evaluate sentiment classification.
if sentiment_dataloader:
for step, batch in enumerate(
tqdm(sentiment_dataloader, desc=f"eval", disable=TQDM_DISABLE)
tqdm(sentiment_dataloader, desc="eval", disable=TQDM_DISABLE)
):
b_ids, b_mask, b_labels, b_sent_ids = (
batch["token_ids"],
Expand Down Expand Up @@ -205,7 +201,7 @@ def model_eval_test_multitask(
para_sent_ids = []
# Evaluate paraphrase detection.
for step, batch in enumerate(
tqdm(paraphrase_dataloader, desc=f"eval", disable=TQDM_DISABLE)
tqdm(paraphrase_dataloader, desc="eval", disable=TQDM_DISABLE)
):
(b_ids1, b_mask1, b_ids2, b_mask2, b_sent_ids) = (
batch["token_ids_1"],
Expand All @@ -230,7 +226,7 @@ def model_eval_test_multitask(
sts_sent_ids = []

# Evaluate semantic textual similarity.
for step, batch in enumerate(tqdm(sts_dataloader, desc=f"eval", disable=TQDM_DISABLE)):
for step, batch in enumerate(tqdm(sts_dataloader, desc="eval", disable=TQDM_DISABLE)):
(b_ids1, b_mask1, b_ids2, b_mask2, b_sent_ids) = (
batch["token_ids_1"],
batch["attention_mask_1"],
Expand All @@ -254,9 +250,7 @@ def model_eval_test_multitask(
sst_sent_ids = []

# Evaluate sentiment classification.
for step, batch in enumerate(
tqdm(sentiment_dataloader, desc=f"eval", disable=TQDM_DISABLE)
):
for step, batch in enumerate(tqdm(sentiment_dataloader, desc="eval", disable=TQDM_DISABLE)):
b_ids, b_mask, b_sent_ids = (
batch["token_ids"],
batch["attention_mask"],
Expand Down Expand Up @@ -347,33 +341,33 @@ def test_model_multitask(args, model, device):

with open(args.sst_dev_out, "w+") as f:
print(f"dev sentiment acc :: {dev_sentiment_accuracy :.3f}")
f.write(f"id \t Predicted_Sentiment \n")
f.write("id \t Predicted_Sentiment \n")
for p, s in zip(dev_sst_sent_ids, dev_sst_y_pred):
f.write(f"{p} , {s} \n")

with open(args.sst_test_out, "w+") as f:
f.write(f"id \t Predicted_Sentiment \n")
f.write("id \t Predicted_Sentiment \n")
for p, s in zip(test_sst_sent_ids, test_sst_y_pred):
f.write(f"{p} , {s} \n")

with open(args.para_dev_out, "w+") as f:
print(f"dev paraphrase acc :: {dev_paraphrase_accuracy :.3f}")
f.write(f"id \t Predicted_Is_Paraphrase \n")
f.write("id \t Predicted_Is_Paraphrase \n")
for p, s in zip(dev_para_sent_ids, dev_para_y_pred):
f.write(f"{p} , {s} \n")

with open(args.para_test_out, "w+") as f:
f.write(f"id \t Predicted_Is_Paraphrase \n")
f.write("id \t Predicted_Is_Paraphrase \n")
for p, s in zip(test_para_sent_ids, test_para_y_pred):
f.write(f"{p} , {s} \n")

with open(args.sts_dev_out, "w+") as f:
print(f"dev sts corr :: {dev_sts_corr :.3f}")
f.write(f"id \t Predicted_Similiary \n")
f.write("id \t Predicted_Similiary \n")
for p, s in zip(dev_sts_sent_ids, dev_sts_y_pred):
f.write(f"{p} , {s} \n")

with open(args.sts_test_out, "w+") as f:
f.write(f"id \t Predicted_Similiary \n")
f.write("id \t Predicted_Similiary \n")
for p, s in zip(test_sts_sent_ids, test_sts_y_pred):
f.write(f"{p} , {s} \n")
37 changes: 16 additions & 21 deletions multitask_classifier.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,30 @@
import argparse
import os
import random
import subprocess
import sys
from contextlib import nullcontext
import itertools
from pprint import pformat
import random, argparse, sys, os, subprocess
from datetime import datetime
from pprint import pformat
from types import SimpleNamespace

import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from bert import BertModel
from AttentionLayer import AttentionLayer
from optimizer import AdamW, SophiaH
from tqdm import tqdm

from datasets import (
SentenceClassificationDataset,
SentencePairDataset,
load_multitask_data,
load_multitask_test_data,
)

from evaluation import model_eval_sst, test_model_multitask, model_eval_multitask
from AttentionLayer import AttentionLayer
from bert import BertModel
from datasets import load_multitask_data
from datasets import SentenceClassificationDataset
from datasets import SentencePairDataset
from evaluation import model_eval_multitask
from evaluation import test_model_multitask
from optimizer import AdamW
from optimizer import SophiaH

TQDM_DISABLE = False

Expand Down Expand Up @@ -171,10 +170,6 @@ def load_model(filepath, model, optimizer, use_gpu):

## Currently only trains on sst dataset
def train_multitask(args):
loss_sst_idx_value = 0
loss_sts_idx_value = 0
loss_para_idx_value = 0

train_all_datasets = True
n_datasets = args.sst + args.sts + args.para
if args.sst or args.sts or args.para:
Expand Down
4 changes: 3 additions & 1 deletion optimizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Callable, Iterable, Tuple
from typing import Callable
from typing import Iterable
from typing import Tuple

import torch
from torch.optim import Optimizer
Expand Down
3 changes: 2 additions & 1 deletion optimizer_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch
import numpy as np
import torch

from optimizer import AdamW

seed = 0
Expand Down
3 changes: 0 additions & 3 deletions prepare_submit.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Creates a zip file for submission on Gradescope.

import os
import sys
import zipfile

required_files = [p for p in os.listdir(".") if p.endswith(".py")] + [
Expand All @@ -11,7 +9,6 @@

def main():
aid = "dnlp_final_project_submission"
path = os.getcwd()
with zipfile.ZipFile(f"{aid}.zip", "w") as zz:
for file in required_files:
zz.write(file, os.path.join(".", file))
Expand Down
1 change: 1 addition & 0 deletions sanity_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch

from bert import BertModel

sanity_data = torch.load("./sanity_check.data")
Expand Down
2 changes: 1 addition & 1 deletion setup_gwdg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tokenizer import BertTokenizer
from bert import BertModel
from tokenizer import BertTokenizer

if __name__ == "__main__":
# Download files
Expand Down
3 changes: 2 additions & 1 deletion sophia_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch
import numpy as np
import torch

from optimizer import SophiaG

seed = 0
Expand Down
Loading

0 comments on commit 6341263

Please sign in to comment.