Merge branch 'felixbur:main' into master

bagustris · Apr 24, 2024 · 9c50c37 · 9c50c37
2 parents 166a98b + 2cc7377
commit 9c50c37
Show file tree

Hide file tree

Showing 23 changed files with 337 additions and 207 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,15 @@
 Changelog
 =========
 
+Version 0.82.0
+--------------
+* added nkuluflag module
+
+Version 0.81.7
+--------------
+* bugfixes
+* added whisper feature extractor
+
 Version 0.81.6
 --------------
 * updated documentation

diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
+                    [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
 - [Overview](#overview)
   - [Confusion matrix](#confusion-matrix)
   - [Epoch progression](#epoch-progression)
@@ -159,6 +161,14 @@ All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
 * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
 * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz 
+* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
+  * usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
+                    [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
+
+
+
+
+
 
 There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)

diff --git a/ini_file.md b/ini_file.md
@@ -193,6 +193,8 @@
       * "hubert-base-ls960", "hubert-large-ll60k", "hubert-large-ls960-ft", hubert-xlarge-ll60k, "hubert-xlarge-ls960-ft"
     * **WavLM**:
       * "wavlm-base", "wavlm-base-plus", "wavlm-large"
+    * **Whisper**: [whisper models](https://huggingface.co/models?other=whisper)
+      * "whisper-base", "whisper-large", "whisper-medium", "whisper-tiny"
     * **audmodel**: [audEERING emotion model embeddings](https://arxiv.org/abs/2203.07378), wav2vec2.0 model finetuned on [MSPPodcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) emotions, embeddings
       * **aud.model** = ./audmodel/ (*path to the audEERING model folder*)
     * **auddim**: [audEERING emotion model dimensions](https://arxiv.org/abs/2203.07378), wav2vec2.0 model finetuned on [MSPPodcast](https://ecs.utdallas.edu/research/researchlabs/msp-lab/MSP-Podcast.html) arousal, dominance, valence

diff --git a/meta/demos/multiple_exeriments/do_experiments.py b/meta/demos/multiple_exeriments/do_experiments.py
@@ -1,48 +1,35 @@
 import os
 
-src_path = 'demos/multiple_exeriments/'
 
 classifiers = [
-    {'--model': 'mlp',
-    '--layers': '\"{\'l1\':64,\'l2\':16}\"'},
-    {'--model': 'mlp',
-    '--layers': '\"{\'l1\':64,\'l2\':16}\"',
-    '--learning_rate': '.1',},
-    {'--model': 'mlp',
-    '--layers': '\"{\'l1\':64,\'l2\':16}\"',
-    '--learning_rate': '.0001',
-    '--drop': '.3',
+    {"--model": "mlp", "--layers": "\"{'l1':64,'l2':16}\"", "--epochs": 100},
+    {
+        "--model": "mlp",
+        "--layers": "\"{'l1':128,'l2':64,'l3':16}\"",
+        "--learning_rate": ".01",
+        "--drop": ".3",
+        "--epochs": 100,
     },
-    {'--model': 'xgb',
-    '--epochs':1},
-    {'--model': 'svm',
-    '--epochs':1},
+    {"--model": "xgb", "--epochs": 1},
+    {"--model": "svm", "--epochs": 1},
 ]
 
 features = [
-    {'--feat': 'os'},
-    # {'--feat': 'os', 
+    {"--feat": "os"},
+    # {'--feat': 'os',
     # '--set': 'ComParE_2016',
     # },
-    # {'--feat': 'mld'},
-    # {'--feat': 'mld',
-    # '--with_os': 'True',
-    # },
-    # {'--feat': 'xbow'},
-    # {'--feat': 'xbow',
-    # '--with_os': 'True',
-    # },
-    # {'--feat': 'trill'},
-    # {'--feat': 'wav2vec'},
+    {"--feat": "audmodel"},
 ]
 
 
 for c in classifiers:
     for f in features:
-        cmd = f'python {src_path}parse_nkulu.py '
+        cmd = "python -m nkululeko.nkuluflag --config exp.ini "
         for item in c:
-            cmd += f'{item} {c[item]} '
+            cmd += f"{item} {c[item]} "
         for item in f:
-            cmd += f'{item} {f[item]} '
+            cmd += f"{item} {f[item]} "
         print(cmd)
-        os.system(cmd)
+        os.system(cmd)
+        # print(f"results: {result}, {last_epoch}")
diff --git a/meta/demos/multiple_exeriments/exp.ini b/meta/demos/multiple_exeriments/exp.ini
@@ -1,23 +1,22 @@
 [EXP]
 root = ./
-name = exp_test
+name = results
 runs = 1
 epochs = 50
 [DATA]
-root_folders = data_roots.ini
 databases = ['emodb']
+emodb = ../../../data/emodb/emodb
+emodb.split_strategy = specified
+emodb.train_tables = ['emotion.categories.train.gold_standard']
+emodb.test_tables = ['emotion.categories.test.gold_standard']
 target = emotion
-labels = ['anger', 'boredom', 'disgust', 'fear', 'happiness', 'neutral', 'sadness']
+labels = ['anger', 'happiness']
 [FEATS]
-wav2vec.model = xxx/wav2vec2-large-robust-ft-swbd-300h
-xbow.model = xxx/openXBOW/
-trill.model = xxx/trill_model
-mld.model = xxx/mld/src
-scale = standard
 [MODEL]
 C_val = .001
 #drop = .5
 learning_rate = 0.0001
 store = True
+patience = 5
 [PLOT]
-best_model = True
+best_model = True
diff --git a/meta/demos/multiple_exeriments/parse_nkulu.py b/meta/demos/multiple_exeriments/parse_nkulu.py
diff --git a/nkululeko/constants.py b/nkululeko/constants.py
@@ -1,2 +1,2 @@
-VERSION="0.81.6"
+VERSION="0.82.0"
 SAMPLING_RATE = 16000
diff --git a/nkululeko/feat_extract/feats_import.py b/nkululeko/feat_extract/feats_import.py
@@ -11,8 +11,8 @@
 class ImportSet(Featureset):
     """Class to import features that have been compiled elsewhere"""
 
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
 
     def extract(self):
         """Import the features."""

diff --git a/nkululeko/feat_extract/feats_mos.py b/nkululeko/feat_extract/feats_mos.py
@@ -27,9 +27,9 @@
 class MosSet(Featureset):
     """Class to predict MOS (mean opinion score)"""
 
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False
 

diff --git a/nkululeko/feat_extract/feats_opensmile.py b/nkululeko/feat_extract/feats_opensmile.py
@@ -15,24 +15,14 @@ def __init__(self, name, data_df, feats_type=None, config_file=None):
             self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
             # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
         except AttributeError:
-            self.util.error(
-                f"something is wrong with feature set: {self.featset}"
-            )
+            self.util.error(f"something is wrong with feature set: {self.featset}")
         self.featlevel = self.util.config_val("FEATS", "level", "functionals")
         try:
-            self.featlevel = self.featlevel.replace(
-                "lld", "LowLevelDescriptors"
-            )
-            self.featlevel = self.featlevel.replace(
-                "functionals", "Functionals"
-            )
-            self.feature_level = eval(
-                f"opensmile.FeatureLevel.{self.featlevel}"
-            )
+            self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
+            self.featlevel = self.featlevel.replace("functionals", "Functionals")
+            self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
         except AttributeError:
-            self.util.error(
-                f"something is wrong with feature level: {self.featlevel}"
-            )
+            self.util.error(f"something is wrong with feature level: {self.featlevel}")
 
     def extract(self):
         """Extract the features based on the initialized dataset or re-open them when found on disk."""
@@ -44,9 +34,7 @@ def extract(self):
         )
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or not os.path.isfile(storage) or no_reuse:
-            self.util.debug(
-                "extracting openSmile features, this might take a while..."
-            )
+            self.util.debug("extracting openSmile features, this might take a while...")
             smile = opensmile.Smile(
                 feature_set=self.feature_set,
                 feature_level=self.feature_level,
@@ -85,9 +73,7 @@ def filter(self):
             selected_features = ast.literal_eval(
                 glob_conf.config["FEATS"]["os.features"]
             )
-            self.util.debug(
-                f"selecting features from opensmile: {selected_features}"
-            )
+            self.util.debug(f"selecting features from opensmile: {selected_features}")
             sel_feats_df = pd.DataFrame()
             hit = False
             for feat in selected_features:

diff --git a/nkululeko/feat_extract/feats_spectra.py b/nkululeko/feat_extract/feats_spectra.py
@@ -4,6 +4,7 @@
 Inspired by code from Su Lei
 
 """
+
 import os
 import torchaudio
 import torchaudio.transforms as T
@@ -23,9 +24,9 @@
 
 
 class Spectraloader(Featureset):
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feat_type):
         """Constructor setting the name"""
-        Featureset.__init__(self, name, data_df)
+        super().__init__(name, data_df, feat_type)
         self.sampling_rate = SAMPLING_RATE
         self.num_bands = int(self.util.config_val("FEATS", "fft_nbands", "64"))
         self.win_dur = int(self.util.config_val("FEATS", "fft_win_dur", "25"))

diff --git a/nkululeko/feat_extract/feats_squim.py b/nkululeko/feat_extract/feats_squim.py
@@ -30,9 +30,9 @@
 class SquimSet(Featureset):
     """Class to predict SQUIM features"""
 
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False