forked from felixbur/nkululeko
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request felixbur#120 from bagustris/master
add jnv corpus and new test file (run_test2.sh)
- Loading branch information
Showing
5 changed files
with
231 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# JNV Corpus | ||
|
||
Pre-processing the JNV Corpus for Nkululeko (CSV format). | ||
|
||
```bash | ||
wget https://ss-takashi.sakura.ne.jp/corpus/jnv/jnv_corpus_ver2.zip | ||
unzip jnv_corpus_ver2.zip | ||
python3 process_database.py | ||
cd ../.. | ||
# the following will resample and replace JNV forpus to 16k | ||
python3 -m nkululeko.resample --config data/jnv/exp.ini | ||
python3 -m nkululeko.nkululeko --config data/jnv/exp.ini | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# process_database.py: pre-processing script for JNV database | ||
|
||
import argparse | ||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def read_audio_files(data_dir): | ||
data = [] | ||
for root, dirs, files in os.walk(data_dir): | ||
for file in files: | ||
if file.endswith(".wav"): | ||
emotion = file.split("_")[1] | ||
data.append({"file": os.path.join( | ||
root, file), "emotion": emotion}) | ||
|
||
df = pd.DataFrame(data) | ||
return df | ||
|
||
|
||
def main(args): | ||
data_dir = args.data_dir | ||
output_dir = args.output_dir | ||
|
||
df = read_audio_files(data_dir) | ||
train_df, dev_df, test_df = np.split(df.sample(frac=1, random_state=42), [ | ||
int(.8 * len(df)), int(.9 * len(df))]) | ||
|
||
train_df.to_csv(os.path.join(output_dir, "jnv_train.csv"), index=False) | ||
dev_df.to_csv(os.path.join(output_dir, "jnv_dev.csv"), index=False) | ||
test_df.to_csv(os.path.join(output_dir, "jnv_test.csv"), index=False) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--data_dir", | ||
type=str, | ||
default="jnv_corpus_ver2/JNV/", | ||
help="Directory containing audio files") | ||
parser.add_argument( | ||
"--output_dir", | ||
type=str, | ||
default="./", | ||
help="Output dir for CSV files") | ||
args = parser.parse_args() | ||
|
||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
#!/bin/bash | ||
|
||
# directory for test files | ||
# test_dir="tests" | ||
|
||
# Display help message | ||
function Help { | ||
echo "Usage: test_runs.sh [options]" | ||
echo "Example: ./test_runs.sh nkululeko" | ||
echo "Options:" | ||
echo " nkululeko: test basic nkululeko" | ||
echo " augment: test augmentation" | ||
echo " predict: test prediction" | ||
echo " demo: test demo" | ||
echo " test: test test module" | ||
echo " multidb: test multidb" | ||
echo " explore: test explore module (must be run last)" | ||
echo " all: test all modules" | ||
echo " -spotlight: test all modules except spotlight (useful in SSH)" | ||
echo " --help: display this help message" | ||
} | ||
|
||
# rm results dir if argument is "nkululeko" or "all" | ||
# TODO: move root to /tmp so no need to do this | ||
if [ "$1" == "nkululeko" ] || [ "$1" == "all" ]; [ "$1" == "-spotlight" ]; then | ||
echo "Removing (old) results directory and create if not exist" | ||
rm -rf tests/results/* | ||
mkdir -p tests/results | ||
fi | ||
|
||
# Run a test and check for errors | ||
function RunTest { | ||
"$@" | ||
if [ $? -ne 0 ]; then | ||
echo "Error: Test failed - $@" | ||
return 1 # exit after error message | ||
# else | ||
# return 0 # continue after error message | ||
fi | ||
} | ||
|
||
# test basic nkululeko | ||
nkululeko_ini_files=( | ||
exp_emodb_os_praat_xgb.ini | ||
exp_emodb_featimport_xgb.ini | ||
exp_emodb_cnn.ini | ||
exp_emodb_balancing.ini | ||
exp_emodb_split.ini | ||
exp_ravdess_os_xgb.ini | ||
exp_agedb_class_os_xgb.ini | ||
exp_emodb_hubert_xgb.ini | ||
exp_emodb_wavlm_xgb.ini | ||
exp_emodb_whisper_xgb.ini | ||
emodb_demo.ini | ||
exp_emodb_os_xgb_test.ini | ||
exp_emodb_trill_test.ini | ||
exp_emodb_wav2vec2_test.ini | ||
exp_emodb_os_xgb.ini | ||
exp_emodb_os_svm.ini | ||
exp_emodb_os_knn.ini | ||
exp_emodb_os_mlp.ini | ||
exp_agedb_os_xgr.ini | ||
exp_agedb_os_mlp.ini | ||
) | ||
|
||
# test augmentation | ||
augment_ini_files=( | ||
exp_emodb_augment_os_xgb.ini | ||
exp_emodb-aug_os_xgb.ini | ||
exp_emodb_random_splice_os_xgb.ini | ||
exp_emodb_rs_os_xgb.ini | ||
emodb_aug_train.ini | ||
) | ||
|
||
# test prediction | ||
predict_ini_files=( | ||
exp_emodb_predict.ini | ||
) | ||
# test demo | ||
demo_ini_files=( | ||
exp_emodb_os_xgb.ini | ||
exp_emodb_os_svm.ini | ||
exp_emodb_os_knn.ini | ||
exp_emodb_os_mlp.ini | ||
exp_agedb_os_xgr.ini | ||
exp_agedb_os_mlp.ini | ||
) | ||
|
||
# test test module | ||
test_ini_files=( | ||
exp_emodb_os_xgb_test.ini | ||
exp_emodb_trill_test.ini | ||
exp_emodb_wav2vec2_test.ini | ||
) | ||
|
||
# test multidb | ||
multidb_ini_files=( | ||
exp_multidb.ini | ||
) | ||
|
||
# test explore module | ||
explore_ini_files=( | ||
exp_emodb_explore_data.ini | ||
exp_emodb_explore_featimportance.ini | ||
exp_emodb_explore_scatter.ini | ||
exp_emodb_explore_features.ini | ||
exp_agedb_explore_data.ini | ||
exp_explore.ini # test splotlight | ||
) | ||
|
||
|
||
if [ $# -eq 0 ] || [ "$1" == "--help" ]; then | ||
Help | ||
fi | ||
|
||
start_time=$(date +%s) | ||
|
||
# Loop over the module or all modules if -all arg is given | ||
if [ "$1" == "all" ]; then | ||
modules=(nkululeko augment predict demo test multidb explore) | ||
elif [ "$1" == "-spotlight" ]; then | ||
modules=(nkululeko augment predict demo test multidb explore) | ||
unset explore_ini_files[-1] # Exclude INI file for spotlight | ||
else | ||
modules=("$@") | ||
fi | ||
|
||
success_count=0 | ||
failed_count=0 | ||
for module in "${modules[@]}" | ||
do | ||
# Run the test over the selected modules | ||
ini_files="${module}_ini_files[@]" | ||
for ini_file in "${!ini_files}" | ||
do | ||
# if module is "demo" add "--list" argument | ||
if [ "$module" == "demo" ]; then | ||
RunTest python3 -m "nkululeko.$module" --config "tests/$ini_file" --list "data/test/samples.csv" | ||
else | ||
RunTest python3 -m "nkululeko.$module" --config "tests/$ini_file" | ||
fi | ||
|
||
if [ $? -eq 0 ]; then | ||
((success_count++)) | ||
else | ||
((failed_count++)) | ||
failed_modules+=("$module with $ini_file") | ||
fi | ||
done | ||
done | ||
|
||
echo "Total tests passed: $success_count" | ||
echo "Total tests failed: $failed_count" | ||
|
||
if [ ${#failed_modules[@]} -gt 0 ]; then | ||
echo "Failed modules and INI files:" | ||
for failed_module in "${failed_modules[@]}"; do | ||
echo "$failed_module" | ||
done | ||
fi | ||
|
||
end_time=$(date +%s) | ||
total_time=$((end_time - start_time)) | ||
|
||
echo "Total time taken: $total_time seconds" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters