updating glucose conversion

phosseini · Apr 5, 2022 · 341a685 · 341a685
1 parent 04103d4
commit 341a685
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 11 deletions.
diff --git a/config/fine_tuning_config.json b/config/fine_tuning_config.json
@@ -4,25 +4,21 @@
   "tokenizer_name": "bert-large-cased",
   "model_checkpoint": "bert-large-cased",
   "experiment_name": "bert-large-copa",
-  "train_data_path": "data/eval/copa_dev_m.csv",
-  "test_data_path": "data/eval/copa_test_m.csv",
+  "train_data": "data/copa/copa_dev_m.csv",
+  "dev_data": "data/copa/copa_dev_m.csv",
+  "test_data": "data/copa/copa_test_m.csv",
   "tuning_output_path": "models/tuning_output",
+  "running_output_path": "models/running_output",
   "add_prompt_to_test": 0,
-  "learning_rate_range": 0,
   "tuning_num_train_epochs": [4],
   "tuning_batch_size": [4, 8, 16],
+  "learning_rate": 3e-5,
   "tuning_learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5],
-  "tuning_learning_rate_do_range": 1,
   "tuning_learning_rate_start": 2e-5,
   "tuning_learning_rate_end": 5e-5,
-  "running_output_path": "models/running_output",
-  "n_fold": 10,
   "n_trials": 1,
   "max_length": 32,
-  "hyperparameter_search": 1,
-  "cross_validation": 0,
   "num_train_epochs": 4,
-  "learning_rate": 3e-5,
   "batch_size": 16,
   "resources_per_trial": {"cpu": 4, "gpu": 0},
   "random_seeds": [107, 117, 127, 137]

diff --git a/fine_tuning_copa.py b/fine_tuning_copa.py
@@ -44,7 +44,6 @@ def compute_metrics(eval_predictions):
 
 
 def preprocess_function(examples, task=params['task_type'], prompt=params['add_prompt_to_test']):
-    # checking task value:
     if task not in ['seq', 'multi', 'nsp']:
         print("Task value should be one of the following: \'seq\' or \'multi\' or \'nsp\'")
         return

diff --git a/convert_glucose.py → glucose_to_text.py b/convert_glucose.py → glucose_to_text.py
@@ -17,7 +17,7 @@ def clean_text(txt):
 df = pd.read_csv('data/glucose/GLUCOSE_training_data_final.csv')
 
 # quality can be 1-3 with 1 being the lowest and 3 being the best.
-df = df[df['worker_quality_rating'].isin([2, 3])]
+df = df[df['worker_quality_rating'].isin([3])]
 
 # "specific" columns contain the actual sentences from the story
 # general columns are the general patterns of a relation