Skip to content

Commit

Permalink
fix jnv flow
Browse files Browse the repository at this point in the history
  • Loading branch information
bagustris committed May 8, 2024
1 parent 72ab5aa commit d6cbf57
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
1 change: 1 addition & 0 deletions data/jnv/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Pre-processing the JNV Corpus for Nkululeko (CSV format).
```bash
wget https://ss-takashi.sakura.ne.jp/corpus/jnv/jnv_corpus_ver2.zip
unzip jnv_corpus_ver2.zip
python3 process_database.py
cd ../..
# the following will resample and replace JNV forpus to 16k
python3 -m nkululeko.resample --config data/jnv/exp.ini
Expand Down
38 changes: 23 additions & 15 deletions data/jnv/process_database.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
# process_database.py: pre-processing script for JNV database

import argparse
import json
import os
import sys

import numpy as np
import pandas as pd
from tqdm import tqdm


def read_audio_files(data_dir):
Expand All @@ -16,27 +13,38 @@ def read_audio_files(data_dir):
for file in files:
if file.endswith(".wav"):
emotion = file.split("_")[1]
data.append({"file": os.path.join(root, file), "emotion": emotion})

data.append({"file": os.path.join(
root, file), "emotion": emotion})

df = pd.DataFrame(data)
return df


def main(args):
data_dir = args.data_dir
output_file = args.output_file
output_dir = args.output_dir

df = read_audio_files(data_dir)
train_df, dev_df, test_df = np.split(df.sample(frac=1, random_state=42), [int(.8*len(df)), int(.9*len(df))])

train_df.to_csv("jnv_train.csv", index=False)
dev_df.to_csv("jnv_dev.csv", index=False)
test_df.to_csv("jnv_test.csv", index=False)
train_df, dev_df, test_df = np.split(df.sample(frac=1, random_state=42), [
int(.8 * len(df)), int(.9 * len(df))])

train_df.to_csv(os.path.join(output_dir, "jnv_train.csv"), index=False)
dev_df.to_csv(os.path.join(output_dir, "jnv_dev.csv"), index=False)
test_df.to_csv(os.path.join(output_dir, "jnv_test.csv"), index=False)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default="jnv_corpus_ver2/JNV/", help="Directory containing audio files")
parser.add_argument("--output_file", type=str, default="jnv_database.csv", help="Output CSV file")
parser.add_argument(
"--data_dir",
type=str,
default="jnv_corpus_ver2/JNV/",
help="Directory containing audio files")
parser.add_argument(
"--output_dir",
type=str,
default="./",
help="Output dir for CSV files")
args = parser.parse_args()

main(args)

0 comments on commit d6cbf57

Please sign in to comment.