forked from felixbur/nkululeko
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
150 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Nkululeko pre-processing for KBES dataset | ||
|
||
Download the dataset from [1], place it in this directory or somewhere else and | ||
ane make soft link here (`ln -sf`). | ||
|
||
```bash | ||
# unzipt the dataset | ||
unzip "KUET Bangla Emotional Speech (KBES) Dataset.zip" | ||
|
||
``` | ||
|
||
Reference: | ||
[1] <https://data.mendeley.com/datasets/vsn37ps3rx/4> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[EXP] | ||
root = /tmp/results/ | ||
name = exp_kbes_hubert_all | ||
[DATA] | ||
databases = ['train', 'dev', 'test'] | ||
train = ./data/kbes/kbes_train.csv | ||
train.type = csv | ||
train.absolute_path = False | ||
train.split_strategy = train | ||
dev = ./data/kbes/kbes_dev.csv | ||
dev.type = csv | ||
dev.absolute_path = False | ||
dev.split_strategy = train | ||
test = ./data/kbes/kbes_test.csv | ||
test.type = csv | ||
test.absolute_path = False | ||
test.split_strategy = test | ||
target = emotion | ||
; labels = ['anger', 'fear', 'sad', 'happy'] | ||
; get the number of classes from the target column automatically | ||
[FEATS] | ||
type = ['hubert-xlarge-ll60k'] | ||
; no_reuse = False | ||
scale = standard | ||
[MODEL] | ||
type = knn | ||
; save = True | ||
[RESAMPLE] | ||
replace = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# process_database.py for KBES dataset | ||
|
||
import pandas as pd | ||
import argparse | ||
from nkululeko.utils.files import find_files | ||
import os | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
def process_database(data_dir, output_dir): | ||
# check if data_dir exist | ||
if not os.path.exists(data_dir): | ||
raise FileNotFoundError(f"Directory {data_dir} not found.") | ||
|
||
# Create the output directory if it doesn't exist | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
# read all wav files | ||
wavs = find_files(data_dir, relative=True, ext=["wav"]) | ||
print(f"Found {len(wavs)} wav files.") | ||
|
||
# map emotion: 1 = Neutral, 2 = Happy, 3 = Sad, 4 = Angry, 5 = Disgust | ||
emotion_mapping = { | ||
1: "neutral", | ||
2: "happy", | ||
3: "sad", | ||
4: "angry", | ||
5: "disgust" | ||
} | ||
|
||
# map intensity, 1 = low, 2 = high | ||
intensity_mapping = { | ||
1: 'low', | ||
2: 'high' | ||
} | ||
|
||
# map gender 0 = female, 1 = male | ||
gender_mapping = { | ||
0: 'female', | ||
1: 'male' | ||
} | ||
|
||
data = [] | ||
for wav in wavs: | ||
# get basename | ||
basename = os.path.basename(wav) | ||
# get emotion | ||
emotion = emotion_mapping[int(basename.split("-")[0])] | ||
# get intensity | ||
intensity = intensity_mapping[int(basename.split("-")[1])] | ||
# get gender | ||
gender = gender_mapping[int(basename.split("-")[2])] | ||
# add language | ||
language = "bangla" | ||
# add to data list | ||
data.append({ | ||
"file": wav, | ||
"emotion": emotion, | ||
"gender": gender, | ||
"intensity": intensity, | ||
"language": language | ||
}) | ||
|
||
# create dataframe from data | ||
df = pd.DataFrame(data) | ||
# split the data into train, dev, and test sets, balanced by emotion | ||
train_df, temp_df = train_test_split( | ||
df, test_size=0.3, stratify=df['emotion'], random_state=42) | ||
dev_df, test_df = train_test_split( | ||
temp_df, test_size=0.5, stratify=temp_df['emotion'], random_state=42) | ||
# write dataframes to csv | ||
train_df.to_csv(os.path.join( | ||
output_dir, "kbes_train.csv"), index=False) | ||
dev_df.to_csv(os.path.join(output_dir, "kbes_dev.csv"), index=False) | ||
test_df.to_csv(os.path.join( | ||
output_dir, "kbes_test.csv"), index=False) | ||
print(f"Number of train samples: {len(train_df)}") | ||
print(f"Number of dev samples: {len(dev_df)}") | ||
print(f"Number of test samples: {len(test_df)}") | ||
print("Database processing completed.") | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Process KBES dataset") | ||
parser.add_argument( | ||
"--data_dir", | ||
type=str, | ||
default="KUET Bangla Emotional Speech (KBES) Dataset", | ||
help="Directory containing the KBES data", | ||
) | ||
parser.add_argument( | ||
"--output_dir", | ||
type=str, | ||
default="./", | ||
help="Directory to store the output CSV files", | ||
) | ||
args = parser.parse_args() | ||
|
||
process_database(args.data_dir, args.output_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters