Merge pull request felixbur#120 from bagustris/master

add jnv corpus and new test file (run_test2.sh)
bagustris · May 13, 2024 · 8c4cc4d · 8c4cc4d
2 parents 99792ae + 4efcbff
commit 8c4cc4d
Show file tree

Hide file tree

Showing 5 changed files with 231 additions and 2 deletions.
diff --git a/data/emodb/README.md b/data/emodb/README.md
@@ -8,7 +8,7 @@ wget https://zenodo.org/record/7447302/files/emodb.zip
 # Unzip
 unzip emodb.zip
 # change to Nkululeko parent directory
-cd ..
+cd ../..
 # run the nkululeko experiment
 python -m nkululeko.nkululeko --config tests/exp_emodb_os_xgb.ini
 ```

diff --git a/data/jnv/README.md b/data/jnv/README.md
@@ -0,0 +1,13 @@
+# JNV Corpus  
+
+Pre-processing the JNV Corpus for Nkululeko (CSV format).
+
+```bash
+wget https://ss-takashi.sakura.ne.jp/corpus/jnv/jnv_corpus_ver2.zip
+unzip jnv_corpus_ver2.zip
+python3 process_database.py
+cd ../..
+# the following will resample and replace JNV forpus to 16k 
+python3 -m nkululeko.resample --config data/jnv/exp.ini
+python3 -m nkululeko.nkululeko --config data/jnv/exp.ini
+```
diff --git a/data/jnv/process_database.py b/data/jnv/process_database.py
@@ -0,0 +1,50 @@
+# process_database.py: pre-processing script for JNV database
+
+import argparse
+import os
+
+import numpy as np
+import pandas as pd
+
+
+def read_audio_files(data_dir):
+    data = []
+    for root, dirs, files in os.walk(data_dir):
+        for file in files:
+            if file.endswith(".wav"):
+                emotion = file.split("_")[1]
+                data.append({"file": os.path.join(
+                    root, file), "emotion": emotion})
+
+    df = pd.DataFrame(data)
+    return df
+
+
+def main(args):
+    data_dir = args.data_dir
+    output_dir = args.output_dir
+
+    df = read_audio_files(data_dir)
+    train_df, dev_df, test_df = np.split(df.sample(frac=1, random_state=42), [
+                                         int(.8 * len(df)), int(.9 * len(df))])
+
+    train_df.to_csv(os.path.join(output_dir, "jnv_train.csv"), index=False)
+    dev_df.to_csv(os.path.join(output_dir, "jnv_dev.csv"), index=False)
+    test_df.to_csv(os.path.join(output_dir, "jnv_test.csv"), index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data_dir",
+        type=str,
+        default="jnv_corpus_ver2/JNV/",
+        help="Directory containing audio files")
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="./",
+        help="Output dir for CSV files")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/run_test2.sh b/run_test2.sh
@@ -0,0 +1,165 @@
+#!/bin/bash
+
+# directory for test files
+# test_dir="tests"
+
+# Display help message
+function Help {
+    echo "Usage: test_runs.sh [options]"
+    echo "Example: ./test_runs.sh nkululeko"
+    echo "Options:"
+    echo "  nkululeko: test basic nkululeko"
+    echo "  augment: test augmentation"
+    echo "  predict: test prediction"
+    echo "  demo: test demo"
+    echo "  test: test test module"
+    echo "  multidb: test multidb"
+    echo "  explore: test explore module (must be run last)"
+    echo "  all: test all modules"
+    echo "  -spotlight: test all modules except spotlight (useful in SSH)"
+    echo "  --help: display this help message"
+}
+
+# rm results dir if argument is "nkululeko" or "all"
+# TODO: move root to /tmp so no need to do this
+if [ "$1" == "nkululeko" ] || [ "$1" == "all" ];  [ "$1" == "-spotlight" ]; then
+    echo "Removing (old) results directory and create if not exist"
+    rm -rf tests/results/*
+    mkdir -p tests/results
+fi
+
+# Run a test and check for errors
+function RunTest {
+    "$@"
+    if [ $? -ne 0 ]; then
+        echo "Error: Test failed - $@"
+        return 1   # exit after error message
+    # else
+    #     return 0   # continue after error message
+    fi
+}
+
+# test basic nkululeko
+nkululeko_ini_files=(
+    exp_emodb_os_praat_xgb.ini
+    exp_emodb_featimport_xgb.ini
+    exp_emodb_cnn.ini
+    exp_emodb_balancing.ini
+    exp_emodb_split.ini
+    exp_ravdess_os_xgb.ini
+    exp_agedb_class_os_xgb.ini
+    exp_emodb_hubert_xgb.ini
+    exp_emodb_wavlm_xgb.ini
+    exp_emodb_whisper_xgb.ini
+    emodb_demo.ini
+    exp_emodb_os_xgb_test.ini
+    exp_emodb_trill_test.ini
+    exp_emodb_wav2vec2_test.ini
+    exp_emodb_os_xgb.ini
+    exp_emodb_os_svm.ini
+    exp_emodb_os_knn.ini
+    exp_emodb_os_mlp.ini
+    exp_agedb_os_xgr.ini
+    exp_agedb_os_mlp.ini
+)
+
+# test augmentation
+augment_ini_files=(
+    exp_emodb_augment_os_xgb.ini
+    exp_emodb-aug_os_xgb.ini
+    exp_emodb_random_splice_os_xgb.ini
+    exp_emodb_rs_os_xgb.ini
+    emodb_aug_train.ini
+)
+
+# test prediction
+predict_ini_files=(
+    exp_emodb_predict.ini
+) 
+# test demo
+demo_ini_files=(
+    exp_emodb_os_xgb.ini
+    exp_emodb_os_svm.ini
+    exp_emodb_os_knn.ini
+    exp_emodb_os_mlp.ini
+    exp_agedb_os_xgr.ini
+    exp_agedb_os_mlp.ini
+)
+
+# test test module
+test_ini_files=(
+    exp_emodb_os_xgb_test.ini
+    exp_emodb_trill_test.ini
+    exp_emodb_wav2vec2_test.ini
+)
+
+# test multidb
+multidb_ini_files=(
+    exp_multidb.ini
+)
+
+# test explore module
+explore_ini_files=(
+    exp_emodb_explore_data.ini
+    exp_emodb_explore_featimportance.ini 
+    exp_emodb_explore_scatter.ini
+    exp_emodb_explore_features.ini
+    exp_agedb_explore_data.ini
+    exp_explore.ini # test splotlight
+)
+
+
+if [ $# -eq 0 ] || [ "$1" == "--help" ]; then
+    Help
+fi
+
+start_time=$(date +%s)
+
+# Loop over the module or all modules if -all arg is given
+if [ "$1" == "all" ]; then
+    modules=(nkululeko augment predict demo test multidb explore)
+elif [ "$1" == "-spotlight" ]; then
+    modules=(nkululeko augment predict demo test multidb explore)
+    unset explore_ini_files[-1]  # Exclude INI file for spotlight
+else
+    modules=("$@")
+fi
+
+success_count=0
+failed_count=0
+for module in "${modules[@]}"
+do
+    # Run the test over the selected modules
+    ini_files="${module}_ini_files[@]"
+    for ini_file in "${!ini_files}"
+    do
+        # if module is "demo" add "--list" argument
+        if [ "$module" == "demo" ]; then
+            RunTest python3 -m "nkululeko.$module" --config "tests/$ini_file" --list "data/test/samples.csv"
+        else
+            RunTest python3 -m "nkululeko.$module" --config "tests/$ini_file"
+        fi
+
+        if [ $? -eq 0 ]; then
+            ((success_count++))
+        else
+            ((failed_count++))
+            failed_modules+=("$module with $ini_file")
+        fi
+    done
+done
+
+echo "Total tests passed: $success_count"
+echo "Total tests failed: $failed_count"
+
+if [ ${#failed_modules[@]} -gt 0 ]; then
+    echo "Failed modules and INI files:"
+    for failed_module in "${failed_modules[@]}"; do
+        echo "$failed_module"
+    done
+fi
+
+end_time=$(date +%s)
+total_time=$((end_time - start_time))
+
+echo "Total time taken: $total_time seconds"
diff --git a/tests/exp_explore.ini b/tests/exp_explore.ini
@@ -1,4 +1,5 @@
-# expects python -m nkukuleko.predict --config tests/exp_emodb_predict.ini
+# expects python3 -m nkululeko.predict --config tests/exp_emodb_predict.ini
+# update pydantic and typing_extensions if ImportError happens
 [EXP]
 root = ./tests/results/
 name = explored