Skip to content

Commit

Permalink
try cpu on finetuning
Browse files Browse the repository at this point in the history
  • Loading branch information
bagustris committed May 26, 2024
1 parent 16c2e59 commit 67cc12a
Showing 1 changed file with 19 additions and 15 deletions.
34 changes: 19 additions & 15 deletions nkululeko/models/model_tuned.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@
import pickle
import typing

import audeer
import audiofile
import audmetric
import datasets
import numpy as np
import pandas as pd
import torch
import transformers
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel

import audeer
import audiofile
import audmetric
from transformers.models.wav2vec2.modeling_wav2vec2 import (
Wav2Vec2Model,
Wav2Vec2PreTrainedModel,
)

import nkululeko.glob_conf as glob_conf
from nkululeko.models.model import Model as BaseModel
Expand All @@ -37,12 +38,13 @@ def __init__(self, df_train, df_test, feats_train, feats_test):
self.target = glob_conf.config["DATA"]["target"]
labels = glob_conf.labels
self.class_num = len(labels)
device = self.util.config_val("MODEL", "device", "cpu")
# device = self.util.config_val("MODEL", "device", "cpu")
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.batch_size = int(self.util.config_val("MODEL", "batch_size", "8"))
if device != "cpu":
if self.device != "cpu":
self.util.debug(f"running on device {device}")
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = device
os.environ["CUDA_VISIBLE_DEVICES"] = self.device
self.df_train, self.df_test = df_train, df_test
self.epoch_num = int(self.util.config_val("EXP", "epochs", 1))

Expand Down Expand Up @@ -79,7 +81,9 @@ def _init_model(self):
le = glob_conf.label_encoder
mapping = dict(zip(le.classes_, range(len(le.classes_))))
target_mapping = {k: int(v) for k, v in mapping.items()}
target_mapping_reverse = {value: key for key, value in target_mapping.items()}
target_mapping_reverse = {
value: key for key,
value in target_mapping.items()}

self.config = transformers.AutoConfig.from_pretrained(
model_path,
Expand Down Expand Up @@ -207,19 +211,17 @@ def train(self):
train_weights /= train_weights.sum()
self.util.debug("train weights: {train_weights}")
criterion = torch.nn.CrossEntropyLoss(
weight=torch.Tensor(train_weights).to("cuda"),
weight=torch.Tensor(train_weights).to(self.device),
)
# criterion = torch.nn.CrossEntropyLoss()

class Trainer(transformers.Trainer):

def compute_loss(
self,
model,
inputs,
return_outputs=False,
):

targets = inputs.pop("labels").squeeze()
targets = targets.type(torch.long)

Expand All @@ -246,7 +248,7 @@ def compute_loss(
gradient_accumulation_steps=self.accumulation_steps,
evaluation_strategy="steps",
num_train_epochs=self.epoch_num,
fp16=True,
fp16=self.device == "cuda",
save_steps=num_steps,
eval_steps=num_steps,
logging_steps=num_steps,
Expand Down Expand Up @@ -462,7 +464,9 @@ def forward(
mean = input_values.mean()

# var = input_values.var()
# raises: onnxruntime.capi.onnxruntime_pybind11_state.NotImplemented: [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for the node ReduceProd_3:ReduceProd(11)
# raises: onnxruntime.capi.onnxruntime_pybind11_state.NotImplemented:
# [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an
# implementation for the node ReduceProd_3:ReduceProd(11)

var = torch.square(input_values - mean).mean()
input_values = (input_values - mean) / torch.sqrt(var + 1e-7)
Expand Down

0 comments on commit 67cc12a

Please sign in to comment.