Skip to content

Commit

Permalink
Merge branch 'master' of github.com:bagustris/nkululeko
Browse files Browse the repository at this point in the history
  • Loading branch information
bagustris committed May 8, 2024
2 parents 5fe6bc8 + 19fc688 commit 5ad31fd
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
12 changes: 12 additions & 0 deletions data/jnv/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# JNV Corpus

Pre-processing the JNV Corpus for Nkululeko (CSV format).

```bash
wget https://ss-takashi.sakura.ne.jp/corpus/jnv/jnv_corpus_ver2.zip
unzip jnv_corpus_ver2.zip
cd ../..
# the following will resample and replace JNV forpus to 16k
python3 -m nkululeko.resample --config data/jnv/exp.ini
python3 -m nkululeko.nkululeko --config data/jnv/exp.ini
```
42 changes: 42 additions & 0 deletions data/jnv/process_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# process_database.py: pre-processing script for JNV database

import argparse
import json
import os
import sys

import numpy as np
import pandas as pd
from tqdm import tqdm


def read_audio_files(data_dir):
data = []
for root, dirs, files in os.walk(data_dir):
for file in files:
if file.endswith(".wav"):
emotion = file.split("_")[1]
data.append({"file": os.path.join(root, file), "emotion": emotion})

df = pd.DataFrame(data)
return df

def main(args):
data_dir = args.data_dir
output_file = args.output_file

df = read_audio_files(data_dir)
train_df, dev_df, test_df = np.split(df.sample(frac=1, random_state=42), [int(.8*len(df)), int(.9*len(df))])

train_df.to_csv("jnv_train.csv", index=False)
dev_df.to_csv("jnv_dev.csv", index=False)
test_df.to_csv("jnv_test.csv", index=False)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default="jnv_corpus_ver2/JNV/", help="Directory containing audio files")
parser.add_argument("--output_file", type=str, default="jnv_database.csv", help="Output CSV file")
args = parser.parse_args()

main(args)

0 comments on commit 5ad31fd

Please sign in to comment.