0.82.0

bagustris · Apr 23, 2024 · 2cc7377 · 2cc7377
1 parent 23e03de
commit 2cc7377
Show file tree

Hide file tree

Showing 6 changed files with 121 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 Changelog
 =========
 
+Version 0.82.0
+--------------
+* added nkuluflag module
+
 Version 0.81.7
 --------------
 * bugfixes

diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
+                    [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
 - [Overview](#overview)
   - [Confusion matrix](#confusion-matrix)
   - [Epoch progression](#epoch-progression)
@@ -159,6 +161,14 @@ All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
 * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
 * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz 
+* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
+  * usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
+                    [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
+
+
+
+
+
 
 There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)

diff --git a/nkululeko/constants.py b/nkululeko/constants.py
@@ -1,2 +1,2 @@
-VERSION="0.81.7"
+VERSION="0.82.0"
 SAMPLING_RATE = 16000
diff --git a/nkululeko/multidb.py b/nkululeko/multidb.py
@@ -12,18 +12,15 @@
 import pandas as pd
 import seaborn as sn
 
-import nkululeko.glob_conf as glob_conf
 from nkululeko.aug_train import doit as aug_train
-from nkululeko.experiment import Experiment
 from nkululeko.nkululeko import doit as nkulu
-from nkululeko.utils.util import Util
 
 
 def main(src_dir):
     parser = argparse.ArgumentParser(
-        description="Call the nkululeko MULTIDB framework.")
-    parser.add_argument("--config", default="exp.ini",
-                        help="The base configuration")
+        description="Call the nkululeko MULTIDB framework."
+    )
+    parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
     if args.config is not None:
         config_file = args.config
@@ -58,8 +55,7 @@ def main(src_dir):
                 dataset = datasets[i]
                 print(f"running {dataset}")
                 if extra_trains:
-                    extra_trains_1 = extra_trains.removeprefix(
-                        "[").removesuffix("]")
+                    extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
                     config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
                     extra_trains_2 = ast.literal_eval(extra_trains)
                     for extra_train in extra_trains_2:
@@ -72,8 +68,7 @@ def main(src_dir):
                 test = datasets[j]
                 print(f"running train: {train}, test: {test}")
                 if extra_trains:
-                    extra_trains_1 = extra_trains.removeprefix(
-                        "[").removesuffix("]")
+                    extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
                     config["DATA"][
                         "databases"
                     ] = f"['{train}', '{test}', {extra_trains_1}]"

diff --git a/nkululeko/nkuluflag.py b/nkululeko/nkuluflag.py
@@ -0,0 +1,95 @@
+import argparse
+import configparser
+import os
+import os.path
+
+from nkululeko.nkululeko import doit as nkulu
+
+
+def do_it(src_dir):
+    parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
+    parser.add_argument("--config", help="The base configuration")
+    parser.add_argument("--data", help="The databases", nargs="*", action="append")
+    parser.add_argument(
+        "--label", nargs="*", help="The labels for the target", action="append"
+    )
+    parser.add_argument(
+        "--tuning_params", nargs="*", help="parameters to be tuned", action="append"
+    )
+    parser.add_argument(
+        "--layers",
+        nargs="*",
+        help="layer config for mlp, e.g. l1:128 ",
+        action="append",
+    )
+    parser.add_argument("--model", default="xgb", help="The model type")
+    parser.add_argument("--feat", default="['os']", help="The feature type")
+    parser.add_argument("--set", help="The opensmile set")
+    parser.add_argument("--with_os", help="To add os features")
+    parser.add_argument("--target", help="The target designation")
+    parser.add_argument("--epochs", help="The number of epochs")
+    parser.add_argument("--runs", help="The number of runs")
+    parser.add_argument("--learning_rate", help="The learning rate")
+    parser.add_argument("--drop", help="The dropout rate [0:1]")
+
+    args = parser.parse_args()
+
+    if args.config is not None:
+        config_file = args.config
+    else:
+        print("ERROR: need config file")
+        quit(-1)
+    # test if config is there
+    if not os.path.isfile(config_file):
+        print(f"ERROR: no such file {config_file}")
+
+    config = configparser.ConfigParser()
+    config.read(config_file)
+    # fill the config
+
+    if args.data is not None:
+        databases = []
+        for t in args.data:
+            databases.append(t[0])
+        print(f"got databases: {databases}")
+        config["DATA"]["databases"] = str(databases)
+    if args.label is not None:
+        labels = []
+        for label in args.label:
+            labels.append(label[0])
+        print(f"got labels: {labels}")
+        config["DATA"]["labels"] = str(labels)
+    if args.tuning_params is not None:
+        tuning_params = []
+        for tp in args.tuning_params:
+            tuning_params.append(tp[0])
+        config["MODEL"]["tuning_params"] = str(tuning_params)
+    if args.layers is not None:
+        config["MODEL"]["layers"] = args.layers[0][0]
+    if args.target is not None:
+        config["DATA"]["target"] = args.target
+    if args.epochs is not None:
+        config["EXP"]["epochs"] = args.epochs
+    if args.runs is not None:
+        config["EXP"]["runs"] = args.runs
+    if args.learning_rate is not None:
+        config["MODEL"]["learning_rate"] = args.learning_rate
+    if args.drop is not None:
+        config["MODEL"]["drop"] = args.drop
+    if args.model is not None:
+        config["MODEL"]["type"] = args.model
+    if args.feat is not None:
+        config["FEATS"]["type"] = f"['{args.feat}']"
+    if args.set is not None:
+        config["FEATS"]["set"] = args.set
+    tmp_config = "tmp.ini"
+    with open(tmp_config, "w") as tmp_file:
+        config.write(tmp_file)
+
+    result, last_epoch = nkulu(tmp_config)
+    return result, last_epoch
+
+
+if __name__ == "__main__":
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    do_it(cwd)  # sys.argv[1])
diff --git a/nkululeko/nkululeko.py b/nkululeko/nkululeko.py
@@ -1,12 +1,14 @@
 # nkululeko.py
 # Entry script to do a Nkululeko experiment
-import numpy as np
-import os.path
-import configparser
 import argparse
+import configparser
+import os.path
+
+import numpy as np
+
+from nkululeko.constants import VERSION
 import nkululeko.experiment as exp
 from nkululeko.utils.util import Util
-from nkululeko.constants import VERSION
 
 
 def doit(config_file):