diff --git a/CHANGELOG.md b/CHANGELOG.md index caa060fe..8544cd30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,16 @@ Changelog ========= +Version 0.81.6 +-------------- +* updated documentation +* updated crema-d +* updated tests + +Version 0.81.5 +-------------- +* added sex=gender for speaker mappings + Version 0.81.4 -------------- * fixed bug in demo module diff --git a/data/crema-d/README.md b/data/crema-d/README.md new file mode 100644 index 00000000..ac728b36 --- /dev/null +++ b/data/crema-d/README.md @@ -0,0 +1,18 @@ +This folder is to import the Crowd-sourced Emotional Multimodal Actors Dataset (CREMA-D) database to nkululeko. + +Labels are: 'angry', 'disgust', 'fear', 'happy', 'neutral', 'sad'. + +Based on the [audb version](https://github.com/audeering/crema-d) + +Load the database with python: +```bash +$ python load_db.py +``` +**inside** the crema-d folder! + +then, as a test, you might do + +```bash +python -m nkululeko.nkululeko --config data/crema-d/test_age.ini +python -m nkululeko.nkululeko --config data/crema-d/test_emotion.ini +``` diff --git a/data/crema-d/load_db.py b/data/crema-d/load_db.py new file mode 100644 index 00000000..86d97581 --- /dev/null +++ b/data/crema-d/load_db.py @@ -0,0 +1,11 @@ +import os + +import audb + + +# set download directory to current +cwd = os.getcwd() +audb.config.CACHE_ROOT = cwd + +# load the latest version of the data +db = audb.load("crema-d", format="wav", sampling_rate=16000, mixdown=True) diff --git a/data/crema-d/test_age.ini b/data/crema-d/test_age.ini new file mode 100644 index 00000000..9357fea3 --- /dev/null +++ b/data/crema-d/test_age.ini @@ -0,0 +1,22 @@ +[EXP] +root = ./data/crema-d/ +name = results_age +type = regression +[DATA] +databases = ['data'] +data = ./data/crema-d/crema-d/1.3.0/fe182b91/ +data.split_strategy = specified +data.colnames = {'sex':'gender'} +data.files_table = ['files'] +data.test_tables = ['age.test'] +data.train_tables = ['age.dev', 'age.train'] +target = age +labels = ['20ies','30ies', '40ies', '50ies', '60ies'] +bins = [-100000, 30, 40, 50, 60, 100000] +[FEATS] +type = ['os'] +scale = standard +[MODEL] +type = svr +C_val = 10 +measure = ccc diff --git a/data/crema-d/test_emotion.ini b/data/crema-d/test_emotion.ini new file mode 100644 index 00000000..c1f518cf --- /dev/null +++ b/data/crema-d/test_emotion.ini @@ -0,0 +1,20 @@ +[EXP] +root = ./data/crema-d/ +name = results_emotion +[DATA] +databases = ['data'] +data = ./data/crema-d/crema-d/1.3.0/fe182b91/ +data.split_strategy = specified +data.colnames = {'sex':'gender'} +data.files_table = ['files'] +data.target_tables = ['emotion.categories.desired.test','emotion.categories.desired.train', 'emotion.categories.desired.dev'] +data.test_tables = ['emotion.categories.desired.test'] +data.train_tables = ['emotion.categories.desired.train', 'emotion.categories.desired.dev'] +target = emotion +labels = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness'] +[FEATS] +type = ['os'] +scale = standard +[MODEL] +type = svm +C_val = 10 diff --git a/nkululeko/constants.py b/nkululeko/constants.py index a1cec53d..6124ca81 100644 --- a/nkululeko/constants.py +++ b/nkululeko/constants.py @@ -1,2 +1,2 @@ -VERSION="0.81.4" +VERSION="0.81.6" SAMPLING_RATE = 16000 diff --git a/nkululeko/data/dataset.py b/nkululeko/data/dataset.py index 87acca4c..8c6a8cec 100644 --- a/nkululeko/data/dataset.py +++ b/nkululeko/data/dataset.py @@ -76,6 +76,7 @@ def _check_cols(self, df): if rename_cols: col_dict = ast.literal_eval(rename_cols) df = df.rename(columns=col_dict) + self.util.debug(f"renamed data columns: {col_dict}") return df def _report_load(self): @@ -281,13 +282,19 @@ def _get_df_for_lists(self, db, df_files): # try to get the age values df_local["age"] = source_df["age"].astype(int) got_age = True - except (KeyError, ValueError, audformat.errors.BadKeyError) as e: + except (KeyError, ValueError, audformat.errors.BadKeyError): pass try: # also it might be possible that the sex is part of the speaker description df_local["gender"] = db[table]["speaker"].get(map="gender") got_gender = True - except (ValueError, audformat.errors.BadKeyError) as e: + except (ValueError, audformat.errors.BadKeyError): + pass + try: + # also it might be possible that the sex is part of the speaker description + df_local["gender"] = db[table]["speaker"].get(map="sex") + got_gender = True + except (ValueError, audformat.errors.BadKeyError): pass try: # also it might be possible that the age is part of the speaker description diff --git a/nkululeko/experiment.py b/nkululeko/experiment.py index d66dd359..5ecf041d 100644 --- a/nkululeko/experiment.py +++ b/nkululeko/experiment.py @@ -695,7 +695,7 @@ def save(self, filename): pickle.dump(self.__dict__, f) f.close() except TypeError: - self.feature_extractor.featExtractor.model = None + self.feature_extractor.feat_extractor.model = None f = open(filename, "wb") pickle.dump(self.__dict__, f) f.close() diff --git a/nkululeko/feat_extract/feats_agender.py b/nkululeko/feat_extract/feats_agender.py index 87392196..fc47bf98 100644 --- a/nkululeko/feat_extract/feats_agender.py +++ b/nkululeko/feat_extract/feats_agender.py @@ -32,10 +32,7 @@ def _load_model(self): audeer.extract_archive(archive_path, model_root) device = self.util.config_val("MODEL", "device", "cpu") self.model = audonnx.load(model_root, device=device) - pytorch_total_params = sum(p.numel() for p in self.model.parameters()) - self.util.debug( - f"initialized agender model with {pytorch_total_params} parameters in total" - ) + self.util.debug(f"initialized agender model") self.model_loaded = True def extract(self): diff --git a/nkululeko/feat_extract/feats_audmodel_dim.py b/nkululeko/feat_extract/feats_auddim.py similarity index 93% rename from nkululeko/feat_extract/feats_audmodel_dim.py rename to nkululeko/feat_extract/feats_auddim.py index da7e7eea..e9d3cbab 100644 --- a/nkululeko/feat_extract/feats_audmodel_dim.py +++ b/nkululeko/feat_extract/feats_auddim.py @@ -13,11 +13,12 @@ import nkululeko.glob_conf as glob_conf -class AudModelDimSet(Featureset): - """ - Emotional dimensions from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper +class AuddimSet(Featureset): + """Emotional dimensions from the wav2vec2 model finetuned on MSPPodcast emotions. + + Described in the paper "Dawn of the transformer era in speech emotion recognition: closing the valence gap" - https://arxiv.org/abs/2203.07378 + https://arxiv.org/abs/2203.07378. """ def __init__(self, name, data_df): diff --git a/nkululeko/feat_extract/feats_audmodel.py b/nkululeko/feat_extract/feats_audmodel.py index f8c5accd..c2f890f4 100644 --- a/nkululeko/feat_extract/feats_audmodel.py +++ b/nkululeko/feat_extract/feats_audmodel.py @@ -11,11 +11,12 @@ from nkululeko.feat_extract.featureset import Featureset -class AudModelSet(Featureset): - """ - Embeddings from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper +class AudmodelSet(Featureset): + """Embeddings from the wav2vec2 based model finetuned on MSPPodcast emotions. + + Described in the paper: "Dawn of the transformer era in speech emotion recognition: closing the valence gap" - https://arxiv.org/abs/2203.07378 + https://arxiv.org/abs/2203.07378. """ def __init__(self, name, data_df): diff --git a/nkululeko/feat_extract/feats_import.py b/nkululeko/feat_extract/feats_import.py index dfe64382..163f371a 100644 --- a/nkululeko/feat_extract/feats_import.py +++ b/nkululeko/feat_extract/feats_import.py @@ -8,7 +8,7 @@ from nkululeko.feat_extract.featureset import Featureset -class Importset(Featureset): +class ImportSet(Featureset): """Class to import features that have been compiled elsewhere""" def __init__(self, name, data_df): diff --git a/nkululeko/feat_extract/feats_mos.py b/nkululeko/feat_extract/feats_mos.py index b1c091dc..abd8ea2e 100644 --- a/nkululeko/feat_extract/feats_mos.py +++ b/nkululeko/feat_extract/feats_mos.py @@ -10,6 +10,7 @@ pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu """ + import os import pandas as pd from tqdm import tqdm @@ -23,7 +24,7 @@ from nkululeko.feat_extract.featureset import Featureset -class MOSSet(Featureset): +class MosSet(Featureset): """Class to predict MOS (mean opinion score)""" def __init__(self, name, data_df): diff --git a/nkululeko/feat_extract/feats_praat.py b/nkululeko/feat_extract/feats_praat.py index 3aea395d..369e3691 100644 --- a/nkululeko/feat_extract/feats_praat.py +++ b/nkululeko/feat_extract/feats_praat.py @@ -1,17 +1,19 @@ # feats_praat.py -from nkululeko.feat_extract.featureset import Featureset +import ast import os -import pandas as pd + import numpy as np -import nkululeko.glob_conf as glob_conf +import pandas as pd + from nkululeko.feat_extract import feinberg_praat -import ast +from nkululeko.feat_extract.featureset import Featureset +import nkululeko.glob_conf as glob_conf -class Praatset(Featureset): - """ - a feature extractor for the Praat software, based on - David R. Feinberg's Praat scripts for the parselmouth python interface. +class PraatSet(Featureset): + """A feature extractor for the Praat software. + + Based on David R. Feinberg's Praat scripts for the parselmouth python interface. https://osf.io/6dwr3/ """ diff --git a/nkululeko/feat_extract/feats_squim.py b/nkululeko/feat_extract/feats_squim.py index ac14f53a..4fc9bd02 100644 --- a/nkululeko/feat_extract/feats_squim.py +++ b/nkululeko/feat_extract/feats_squim.py @@ -1,36 +1,33 @@ -""" feats_squim.py -predict SQUIM ( SPEECH QUALITY AND INTELLIGIBILITY -MEASURES) features +"""Predict SQUIM ( SPEECH QUALITY AND INTELLIGIBILITY MEASURES) features. - - Wideband Perceptual Estimation of Speech Quality (PESQ) [2] - Short-Time Objective Intelligibility (STOI) [3] - Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) [4] - - -adapted from +Wideband Perceptual Estimation of Speech Quality (PESQ) [2]. + Short-Time Objective Intelligibility (STOI) [3]. + Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) [4]. +Adapted from from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py -paper: https://arxiv.org/pdf/2304.01448.pdf - -needs +paper: https://arxiv.org/pdf/2304.01448.pdf. +Needs pip uninstall -y torch torchvision torchaudio pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu """ import os -from tqdm import tqdm + import pandas as pd import torch import torchaudio from torchaudio.pipelines import SQUIM_OBJECTIVE +from tqdm import tqdm + import audiofile + +from nkululeko.feat_extract.featureset import Featureset import nkululeko.glob_conf as glob_conf from nkululeko.utils.util import Util -from nkululeko.feat_extract.featureset import Featureset -class SQUIMSet(Featureset): +class SquimSet(Featureset): """Class to predict SQUIM features""" def __init__(self, name, data_df): diff --git a/nkululeko/feature_extractor.py b/nkululeko/feature_extractor.py index fd797588..a5f8f5bf 100644 --- a/nkululeko/feature_extractor.py +++ b/nkululeko/feature_extractor.py @@ -39,34 +39,51 @@ def extract(self): self.feats = pd.DataFrame() for feats_type in self.feats_types: store_name = f"{self.data_name}_{feats_type}" - feat_extractor = self._get_feat_extractor(store_name, feats_type) - feat_extractor.extract() - feat_extractor.filter() - self.feats = pd.concat([self.feats, feat_extractor.df], axis=1) + self.feat_extractor = self._get_feat_extractor(store_name, feats_type) + self.feat_extractor.extract() + self.feat_extractor.filter() + self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1) return self.feats def extract_sample(self, signal, sr): - return self.featExtractor.extract_sample(signal, sr) + return self.feat_extractor.extract_sample(signal, sr) def _get_feat_extractor(self, store_name, feats_type): feat_extractor_class = self._get_feat_extractor_class(feats_type) if feat_extractor_class is None: self.util.error(f"unknown feats_type: {feats_type}") - return feat_extractor_class(f"{store_name}_{self.feats_designation}", self.data_df) + return feat_extractor_class( + f"{store_name}_{self.feats_designation}", self.data_df + ) def _get_feat_extractor_class(self, feats_type): if feats_type == "os": from nkululeko.feat_extract.feats_opensmile import Opensmileset + return Opensmileset elif feats_type == "spectra": from nkululeko.feat_extract.feats_spectra import Spectraloader + return Spectraloader elif feats_type == "trill": from nkululeko.feat_extract.feats_trill import TRILLset + return TRILLset elif feats_type.startswith(("wav2vec", "hubert", "wavlm", "spkrec")): return self._get_feat_extractor_by_prefix(feats_type) - elif feats_type in ("audmodel", "auddim", "agender", "agender_agender", "snr", "mos", "squim", "clap", "praat", "mld", "import"): + elif feats_type in ( + "audmodel", + "auddim", + "agender", + "agender_agender", + "snr", + "mos", + "squim", + "clap", + "praat", + "mld", + "import", + ): return self._get_feat_extractor_by_name(feats_type) else: return None @@ -74,14 +91,14 @@ def _get_feat_extractor_class(self, feats_type): def _get_feat_extractor_by_prefix(self, feats_type): prefix, _, ext = feats_type.partition("_") from importlib import import_module - module = import_module( - f"nkululeko.feat_extract.feats_{prefix.lower()}") + + module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}") class_name = f"{prefix.capitalize()}{ext.capitalize()}set" return getattr(module, class_name) def _get_feat_extractor_by_name(self, feats_type): from importlib import import_module - module = import_module( - f"nkululeko.feat_extract.feats_{feats_type.lower()}") + + module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}") class_name = f"{feats_type.capitalize()}Set" return getattr(module, class_name) diff --git a/nkululeko/test_predictor.py b/nkululeko/test_predictor.py index 2c9d8fea..dc9a88f2 100644 --- a/nkululeko/test_predictor.py +++ b/nkululeko/test_predictor.py @@ -62,3 +62,4 @@ def predict_and_store(self): df = df.drop(columns=[target]) df = df.rename(columns={"class_label": target}) df.to_csv(self.name) + self.util.debug(f"results stored in {self.name}") diff --git a/tests/data_roots.ini b/tests/data_roots.ini index 4a618a05..b1cf39d5 100644 --- a/tests/data_roots.ini +++ b/tests/data_roots.ini @@ -4,6 +4,11 @@ emodb.split_strategy = specified emodb.test_tables = ['emotion.categories.test.gold_standard'] emodb.train_tables = ['emotion.categories.train.gold_standard'] emodb.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'} -polish = ./data/polish_emo -polish.mapping = {'anger':'angry', 'joy':'happy', 'sadness':'sad', 'neutral':'neutral'} -polish.test_size = 30 +crema-d = ./data/crema-d/crema-d/1.3.0/fe182b91/ +crema-d.split_strategy = specified +crema-d.colnames = {'sex':'gender'} +crema-d.files_table = ['files'] +crema-d.target_tables = ['emotion.categories.desired.test','emotion.categories.desired.train', 'emotion.categories.desired.dev'] +crema-d.test_tables = ['emotion.categories.desired.test'] +crema-d.train_tables = ['emotion.categories.desired.train', 'emotion.categories.desired.dev'] +crema-d.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'} diff --git a/tests/exp_agedb_os_mlp.ini b/tests/exp_agedb_os_mlp.ini index b073d530..e9eac397 100644 --- a/tests/exp_agedb_os_mlp.ini +++ b/tests/exp_agedb_os_mlp.ini @@ -3,7 +3,7 @@ root = ./tests/results/ name = exp_agedb type = regression runs = 1 -epochs = 25 +epochs = 50 save = True [DATA] databases = ['emodb'] @@ -23,7 +23,7 @@ layers = {'l1':1024, 'l2':128} drop = .4 loss = 1-ccc measure = ccc -save = True +patience = 5 [PLOT] best_model = True epoch_progression = True diff --git a/tests/exp_emodb_os_mlp.ini b/tests/exp_emodb_os_mlp.ini index 2168c745..aa6edf5a 100644 --- a/tests/exp_emodb_os_mlp.ini +++ b/tests/exp_emodb_os_mlp.ini @@ -1,8 +1,8 @@ [EXP] root = ./tests/results/ -name = exp_emodb_os_mlp +name = exp_emodb runs = 1 -epochs = 50 +epochs = 500 save = True [DATA] databases = ['emodb'] @@ -10,15 +10,9 @@ emodb = ./data/emodb/emodb emodb.split_strategy = specified emodb.test_tables = ['emotion.categories.test.gold_standard'] emodb.train_tables = ['emotion.categories.train.gold_standard'] -labels = ['angry', 'happy', 'neutral', 'sad'] -emodb.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'} -emodb.limit = 350 target = emotion [FEATS] type = ['os'] -#type = ['audmodel'] -; type = ['wav2vec'] -; wav2vec.model = ../wav2vec2-embeddings/wav2vec2-large-robust-ft-swbd-300h/ scale = standard [MODEL] type = mlp diff --git a/tests/exp_emodb_os_xgb.ini b/tests/exp_emodb_os_xgb.ini index ddda1ebb..c2ac7053 100644 --- a/tests/exp_emodb_os_xgb.ini +++ b/tests/exp_emodb_os_xgb.ini @@ -7,15 +7,15 @@ save = True [DATA] databases = ['emodb'] emodb = ./data/emodb/emodb -#emodb.split_strategy = specified -#emodb.test_tables = ['emotion.categories.test.gold_standard'] -#emodb.train_tables = ['emotion.categories.train.gold_standard'] -#emodb.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'} -#labels = ['angry', 'happy', 'neutral', 'sad'] +emodb.split_strategy = specified +emodb.test_tables = ['emotion.categories.test.gold_standard'] +emodb.train_tables = ['emotion.categories.train.gold_standard'] +emodb.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'} +labels = ['angry', 'happy', 'neutral', 'sad'] target = emotion [FEATS] type = ['os'] store_format = csv scale = standard [MODEL] -type = xgb \ No newline at end of file +type = xgb diff --git a/tests/exp_emodb_os_xgb_test.ini b/tests/exp_emodb_os_xgb_test.ini index 7ff0576d..e0c86f7e 100644 --- a/tests/exp_emodb_os_xgb_test.ini +++ b/tests/exp_emodb_os_xgb_test.ini @@ -6,8 +6,7 @@ save = True databases = ['emodb'] root_folders = tests/data_roots.ini target = emotion -tests = ['polish'] -polish.split_strategy = speaker_split +tests = ['crema-d'] labels = ['angry', 'happy', 'neutral', 'sad'] no_reuse = True [FEATS] diff --git a/tests/exp_multidb.ini b/tests/exp_multidb.ini index 9f7d0e51..f681a3e6 100644 --- a/tests/exp_multidb.ini +++ b/tests/exp_multidb.ini @@ -1,9 +1,10 @@ [EXP] root = ./tests/results/multidb epochs = 100 -databases = ['emodb', 'polish'] +databases = ['emodb', 'crema-d'] [DATA] root_folders = ./tests/data_roots.ini +crema-d.limit_samples = 1000 target = emotion labels = ['neutral', 'happy', 'sad', 'angry'] [FEATS]