Skip to content

Commit

Permalink
Merge branch 'felixbur:main' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
bagustris authored Apr 23, 2024
2 parents 010ac7d + 269e91a commit 764e487
Show file tree
Hide file tree
Showing 23 changed files with 179 additions and 75 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Changelog
=========

Version 0.81.6
--------------
* updated documentation
* updated crema-d
* updated tests

Version 0.81.5
--------------
* added sex=gender for speaker mappings

Version 0.81.4
--------------
* fixed bug in demo module
Expand Down
18 changes: 18 additions & 0 deletions data/crema-d/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
This folder is to import the Crowd-sourced Emotional Multimodal Actors Dataset (CREMA-D) database to nkululeko.

Labels are: 'angry', 'disgust', 'fear', 'happy', 'neutral', 'sad'.

Based on the [audb version](https://github.com/audeering/crema-d)

Load the database with python:
```bash
$ python load_db.py
```
**inside** the crema-d folder!

then, as a test, you might do

```bash
python -m nkululeko.nkululeko --config data/crema-d/test_age.ini
python -m nkululeko.nkululeko --config data/crema-d/test_emotion.ini
```
11 changes: 11 additions & 0 deletions data/crema-d/load_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

import audb


# set download directory to current
cwd = os.getcwd()
audb.config.CACHE_ROOT = cwd

# load the latest version of the data
db = audb.load("crema-d", format="wav", sampling_rate=16000, mixdown=True)
22 changes: 22 additions & 0 deletions data/crema-d/test_age.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[EXP]
root = ./data/crema-d/
name = results_age
type = regression
[DATA]
databases = ['data']
data = ./data/crema-d/crema-d/1.3.0/fe182b91/
data.split_strategy = specified
data.colnames = {'sex':'gender'}
data.files_table = ['files']
data.test_tables = ['age.test']
data.train_tables = ['age.dev', 'age.train']
target = age
labels = ['20ies','30ies', '40ies', '50ies', '60ies']
bins = [-100000, 30, 40, 50, 60, 100000]
[FEATS]
type = ['os']
scale = standard
[MODEL]
type = svr
C_val = 10
measure = ccc
20 changes: 20 additions & 0 deletions data/crema-d/test_emotion.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[EXP]
root = ./data/crema-d/
name = results_emotion
[DATA]
databases = ['data']
data = ./data/crema-d/crema-d/1.3.0/fe182b91/
data.split_strategy = specified
data.colnames = {'sex':'gender'}
data.files_table = ['files']
data.target_tables = ['emotion.categories.desired.test','emotion.categories.desired.train', 'emotion.categories.desired.dev']
data.test_tables = ['emotion.categories.desired.test']
data.train_tables = ['emotion.categories.desired.train', 'emotion.categories.desired.dev']
target = emotion
labels = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness']
[FEATS]
type = ['os']
scale = standard
[MODEL]
type = svm
C_val = 10
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.81.4"
VERSION="0.81.6"
SAMPLING_RATE = 16000
11 changes: 9 additions & 2 deletions nkululeko/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def _check_cols(self, df):
if rename_cols:
col_dict = ast.literal_eval(rename_cols)
df = df.rename(columns=col_dict)
self.util.debug(f"renamed data columns: {col_dict}")
return df

def _report_load(self):
Expand Down Expand Up @@ -281,13 +282,19 @@ def _get_df_for_lists(self, db, df_files):
# try to get the age values
df_local["age"] = source_df["age"].astype(int)
got_age = True
except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
except (KeyError, ValueError, audformat.errors.BadKeyError):
pass
try:
# also it might be possible that the sex is part of the speaker description
df_local["gender"] = db[table]["speaker"].get(map="gender")
got_gender = True
except (ValueError, audformat.errors.BadKeyError) as e:
except (ValueError, audformat.errors.BadKeyError):
pass
try:
# also it might be possible that the sex is part of the speaker description
df_local["gender"] = db[table]["speaker"].get(map="sex")
got_gender = True
except (ValueError, audformat.errors.BadKeyError):
pass
try:
# also it might be possible that the age is part of the speaker description
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ def save(self, filename):
pickle.dump(self.__dict__, f)
f.close()
except TypeError:
self.feature_extractor.featExtractor.model = None
self.feature_extractor.feat_extractor.model = None
f = open(filename, "wb")
pickle.dump(self.__dict__, f)
f.close()
Expand Down
5 changes: 1 addition & 4 deletions nkululeko/feat_extract/feats_agender.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ def _load_model(self):
audeer.extract_archive(archive_path, model_root)
device = self.util.config_val("MODEL", "device", "cpu")
self.model = audonnx.load(model_root, device=device)
pytorch_total_params = sum(p.numel() for p in self.model.parameters())
self.util.debug(
f"initialized agender model with {pytorch_total_params} parameters in total"
)
self.util.debug(f"initialized agender model")
self.model_loaded = True

def extract(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
import nkululeko.glob_conf as glob_conf


class AudModelDimSet(Featureset):
"""
Emotional dimensions from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
class AuddimSet(Featureset):
"""Emotional dimensions from the wav2vec2 model finetuned on MSPPodcast emotions.
Described in the paper
"Dawn of the transformer era in speech emotion recognition: closing the valence gap"
https://arxiv.org/abs/2203.07378
https://arxiv.org/abs/2203.07378.
"""

def __init__(self, name, data_df):
Expand Down
9 changes: 5 additions & 4 deletions nkululeko/feat_extract/feats_audmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
from nkululeko.feat_extract.featureset import Featureset


class AudModelSet(Featureset):
"""
Embeddings from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
class AudmodelSet(Featureset):
"""Embeddings from the wav2vec2 based model finetuned on MSPPodcast emotions.
Described in the paper:
"Dawn of the transformer era in speech emotion recognition: closing the valence gap"
https://arxiv.org/abs/2203.07378
https://arxiv.org/abs/2203.07378.
"""

def __init__(self, name, data_df):
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/feat_extract/feats_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from nkululeko.feat_extract.featureset import Featureset


class Importset(Featureset):
class ImportSet(Featureset):
"""Class to import features that have been compiled elsewhere"""

def __init__(self, name, data_df):
Expand Down
3 changes: 2 additions & 1 deletion nkululeko/feat_extract/feats_mos.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
"""

import os
import pandas as pd
from tqdm import tqdm
Expand All @@ -23,7 +24,7 @@
from nkululeko.feat_extract.featureset import Featureset


class MOSSet(Featureset):
class MosSet(Featureset):
"""Class to predict MOS (mean opinion score)"""

def __init__(self, name, data_df):
Expand Down
18 changes: 10 additions & 8 deletions nkululeko/feat_extract/feats_praat.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# feats_praat.py
from nkululeko.feat_extract.featureset import Featureset
import ast
import os
import pandas as pd

import numpy as np
import nkululeko.glob_conf as glob_conf
import pandas as pd

from nkululeko.feat_extract import feinberg_praat
import ast
from nkululeko.feat_extract.featureset import Featureset
import nkululeko.glob_conf as glob_conf


class Praatset(Featureset):
"""
a feature extractor for the Praat software, based on
David R. Feinberg's Praat scripts for the parselmouth python interface.
class PraatSet(Featureset):
"""A feature extractor for the Praat software.
Based on David R. Feinberg's Praat scripts for the parselmouth python interface.
https://osf.io/6dwr3/
"""
Expand Down
29 changes: 13 additions & 16 deletions nkululeko/feat_extract/feats_squim.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,33 @@
""" feats_squim.py
predict SQUIM ( SPEECH QUALITY AND INTELLIGIBILITY
MEASURES) features
"""Predict SQUIM ( SPEECH QUALITY AND INTELLIGIBILITY MEASURES) features.
Wideband Perceptual Estimation of Speech Quality (PESQ) [2]
Short-Time Objective Intelligibility (STOI) [3]
Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) [4]
adapted from
Wideband Perceptual Estimation of Speech Quality (PESQ) [2].
Short-Time Objective Intelligibility (STOI) [3].
Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) [4].
Adapted from
from https://pytorch.org/audio/main/tutorials/squim_tutorial.html#sphx-glr-tutorials-squim-tutorial-py
paper: https://arxiv.org/pdf/2304.01448.pdf
needs
paper: https://arxiv.org/pdf/2304.01448.pdf.
Needs
pip uninstall -y torch torchvision torchaudio
pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
"""

import os
from tqdm import tqdm

import pandas as pd
import torch
import torchaudio
from torchaudio.pipelines import SQUIM_OBJECTIVE
from tqdm import tqdm

import audiofile

from nkululeko.feat_extract.featureset import Featureset
import nkululeko.glob_conf as glob_conf
from nkululeko.utils.util import Util
from nkululeko.feat_extract.featureset import Featureset


class SQUIMSet(Featureset):
class SquimSet(Featureset):
"""Class to predict SQUIM features"""

def __init__(self, name, data_df):
Expand Down
39 changes: 28 additions & 11 deletions nkululeko/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,49 +39,66 @@ def extract(self):
self.feats = pd.DataFrame()
for feats_type in self.feats_types:
store_name = f"{self.data_name}_{feats_type}"
feat_extractor = self._get_feat_extractor(store_name, feats_type)
feat_extractor.extract()
feat_extractor.filter()
self.feats = pd.concat([self.feats, feat_extractor.df], axis=1)
self.feat_extractor = self._get_feat_extractor(store_name, feats_type)
self.feat_extractor.extract()
self.feat_extractor.filter()
self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1)
return self.feats

def extract_sample(self, signal, sr):
return self.featExtractor.extract_sample(signal, sr)
return self.feat_extractor.extract_sample(signal, sr)

def _get_feat_extractor(self, store_name, feats_type):
feat_extractor_class = self._get_feat_extractor_class(feats_type)
if feat_extractor_class is None:
self.util.error(f"unknown feats_type: {feats_type}")
return feat_extractor_class(f"{store_name}_{self.feats_designation}", self.data_df)
return feat_extractor_class(
f"{store_name}_{self.feats_designation}", self.data_df
)

def _get_feat_extractor_class(self, feats_type):
if feats_type == "os":
from nkululeko.feat_extract.feats_opensmile import Opensmileset

return Opensmileset
elif feats_type == "spectra":
from nkululeko.feat_extract.feats_spectra import Spectraloader

return Spectraloader
elif feats_type == "trill":
from nkululeko.feat_extract.feats_trill import TRILLset

return TRILLset
elif feats_type.startswith(("wav2vec", "hubert", "wavlm", "spkrec")):
return self._get_feat_extractor_by_prefix(feats_type)
elif feats_type in ("audmodel", "auddim", "agender", "agender_agender", "snr", "mos", "squim", "clap", "praat", "mld", "import"):
elif feats_type in (
"audmodel",
"auddim",
"agender",
"agender_agender",
"snr",
"mos",
"squim",
"clap",
"praat",
"mld",
"import",
):
return self._get_feat_extractor_by_name(feats_type)
else:
return None

def _get_feat_extractor_by_prefix(self, feats_type):
prefix, _, ext = feats_type.partition("_")
from importlib import import_module
module = import_module(
f"nkululeko.feat_extract.feats_{prefix.lower()}")

module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
class_name = f"{prefix.capitalize()}{ext.capitalize()}set"
return getattr(module, class_name)

def _get_feat_extractor_by_name(self, feats_type):
from importlib import import_module
module = import_module(
f"nkululeko.feat_extract.feats_{feats_type.lower()}")

module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}")
class_name = f"{feats_type.capitalize()}Set"
return getattr(module, class_name)
1 change: 1 addition & 0 deletions nkululeko/test_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ def predict_and_store(self):
df = df.drop(columns=[target])
df = df.rename(columns={"class_label": target})
df.to_csv(self.name)
self.util.debug(f"results stored in {self.name}")
11 changes: 8 additions & 3 deletions tests/data_roots.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ emodb.split_strategy = specified
emodb.test_tables = ['emotion.categories.test.gold_standard']
emodb.train_tables = ['emotion.categories.train.gold_standard']
emodb.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'}
polish = ./data/polish_emo
polish.mapping = {'anger':'angry', 'joy':'happy', 'sadness':'sad', 'neutral':'neutral'}
polish.test_size = 30
crema-d = ./data/crema-d/crema-d/1.3.0/fe182b91/
crema-d.split_strategy = specified
crema-d.colnames = {'sex':'gender'}
crema-d.files_table = ['files']
crema-d.target_tables = ['emotion.categories.desired.test','emotion.categories.desired.train', 'emotion.categories.desired.dev']
crema-d.test_tables = ['emotion.categories.desired.test']
crema-d.train_tables = ['emotion.categories.desired.train', 'emotion.categories.desired.dev']
crema-d.mapping = {'anger':'angry', 'happiness':'happy', 'sadness':'sad', 'neutral':'neutral'}
Loading

0 comments on commit 764e487

Please sign in to comment.