Skip to content

Commit

Permalink
Merge branch 'felixbur:main' into add-db
Browse files Browse the repository at this point in the history
  • Loading branch information
bagustris authored May 15, 2024
2 parents 8ad188f + 201ddf9 commit 75d94de
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 91 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

Version 0.84.1
--------------
* made resample independent of config file

Version 0.84.0
--------------
* added SHAP analysis
Expand Down
13 changes: 9 additions & 4 deletions nkululeko/augmenting/resampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@


class Resampler:
def __init__(self, df, not_testing=True):
def __init__(self, df, replace, not_testing=True):
self.SAMPLING_RATE = 16000
self.df = df
self.util = Util("resampler", has_config=not_testing)
self.util.warn(f"all files might be resampled to {self.SAMPLING_RATE}")
self.not_testing = not_testing
self.replace = eval(self.util.config_val(
"RESAMPLE", "replace", "False")) if not not_testing else replace

def resample(self):
files = self.df.index.get_level_values(0).values
replace = eval(self.util.config_val("RESAMPLE", "replace", "False"))
# replace = eval(self.util.config_val("RESAMPLE", "replace", "False"))
replace = self.replace
if self.not_testing:
store = self.util.get_path("store")
else:
Expand All @@ -42,7 +45,8 @@ def resample(self):
continue
if org_sr != self.SAMPLING_RATE:
self.util.debug(f"resampling {f} (sr = {org_sr})")
resampler = torchaudio.transforms.Resample(org_sr, self.SAMPLING_RATE)
resampler = torchaudio.transforms.Resample(
org_sr, self.SAMPLING_RATE)
signal = resampler(signal)
if replace:
torchaudio.save(
Expand All @@ -59,7 +63,8 @@ def resample(self):
self.df = self.df.set_index(
self.df.index.set_levels(new_files, level="file")
)
target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
target_file = self.util.config_val(
"RESAMPLE", "target", "resampled.csv")
# remove encoded labels
target = self.util.config_val("DATA", "target", "emotion")
if "class_label" in self.df.columns:
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.84.0"
VERSION="0.84.1"
SAMPLING_RATE = 16000
130 changes: 76 additions & 54 deletions nkululeko/resample.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,100 @@
# resample.py
# change the sampling rate for train and test splits
# change the sampling rate for audio file or INI file (train, test, all)

import argparse
import configparser
import os

import pandas as pd

import audformat
from nkululeko.augmenting.resampler import Resampler
from nkululeko.utils.util import Util

from nkululeko.constants import VERSION
from nkululeko.experiment import Experiment
from nkululeko.utils.util import Util


def main(src_dir):
parser = argparse.ArgumentParser(
description="Call the nkululeko RESAMPLE framework.")
parser.add_argument("--config", default="exp.ini",
description="Call the nkululeko RESAMPLE framework.")
parser.add_argument("--config", default=None,
help="The base configuration")
parser.add_argument("--file", default=None,
help="The input audio file to resample")
parser.add_argument("--replace", action="store_true",
help="Replace the original audio file")

args = parser.parse_args()
if args.config is not None:
config_file = args.config
else:
config_file = f"{src_dir}/exp.ini"

# test if the configuration file exists
if not os.path.isfile(config_file):
print(f"ERROR: no such file: {config_file}")
if args.file is None and args.config is None:
print("ERROR: Either --file or --config argument must be provided.")
exit()

# load one configuration per experiment
config = configparser.ConfigParser()
config.read(config_file)
# create a new experiment
expr = Experiment(config)
module = "resample"
expr.set_module(module)
util = Util(module)
util.debug(
f"running {expr.name} from config {config_file}, nkululeko version"
f" {VERSION}"
)

if util.config_val("EXP", "no_warnings", False):
import warnings

warnings.filterwarnings("ignore")

# load the data
expr.load_datasets()

# split into train and test
expr.fill_train_and_tests()
util.debug(
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")

sample_selection = util.config_val("RESAMPLE", "sample_selection", "all")
if sample_selection == "all":
df = pd.concat([expr.df_train, expr.df_test])
elif sample_selection == "train":
df = expr.df_train
elif sample_selection == "test":
df = expr.df_test
if args.file is not None:
# Load the audio file into a DataFrame
files = pd.Series([args.file])
df_sample = pd.DataFrame(index=files)
df_sample.index = audformat.utils.to_segmented_index(
df_sample.index, allow_nat=False
)

# Resample the audio file
util = Util("resampler", has_config=False)
util.debug(f"Resampling audio file: {args.file}")
rs = Resampler(df_sample, not_testing=True, replace=args.replace)
rs.resample()
else:
util.error(
f"unknown selection specifier {sample_selection}, should be [all |"
" train | test]"
# Existing code for handling INI file
config_file = args.config

# Test if the configuration file exists
if not os.path.isfile(config_file):
print(f"ERROR: no such file: {config_file}")
exit()

# Load one configuration per experiment
config = configparser.ConfigParser()
config.read(config_file)
# Create a new experiment
expr = Experiment(config)
module = "resample"
expr.set_module(module)
util = Util(module)
util.debug(
f"running {expr.name} from config {config_file}, nkululeko version"
f" {VERSION}"
)
util.debug(f"resampling {sample_selection}: {df.shape[0]} samples")
rs = Resampler(df)
rs.resample()
print("DONE")

if util.config_val("EXP", "no_warnings", False):
import warnings
warnings.filterwarnings("ignore")

# Load the data
expr.load_datasets()

# Split into train and test
expr.fill_train_and_tests()
util.debug(
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")

sample_selection = util.config_val(
"RESAMPLE", "sample_selection", "all")
if sample_selection == "all":
df = pd.concat([expr.df_train, expr.df_test])
elif sample_selection == "train":
df = expr.df_train
elif sample_selection == "test":
df = expr.df_test
else:
util.error(
f"unknown selection specifier {sample_selection}, should be [all |"
" train | test]"
)
util.debug(f"resampling {sample_selection}: {df.shape[0]} samples")
replace = util.config_val("RESAMPLE", "replace", "False")
rs = Resampler(df, replace=replace)
rs.resample()


if __name__ == "__main__":
cwd = os.path.dirname(os.path.abspath(__file__))
main(cwd) # use this if you want to state the config file path on command line
main(cwd)
85 changes: 53 additions & 32 deletions nkululeko/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,43 +33,58 @@ def __init__(self, caller=None, has_config=True):
else:
self.caller = ""
if has_config:
import nkululeko.glob_conf as glob_conf

self.config = glob_conf.config
self.got_data_roots = self.config_val("DATA", "root_folders", False)
if self.got_data_roots:
# if there is a global data rootfolder file, read from there
if not os.path.isfile(self.got_data_roots):
self.error(f"no such file: {self.got_data_roots}")
self.data_roots = configparser.ConfigParser()
self.data_roots.read(self.got_data_roots)
# self.debug(f"getting data roots from {self.got_data_roots}")
try:
import nkululeko.glob_conf as glob_conf
self.config = glob_conf.config
self.got_data_roots = self.config_val(
"DATA", "root_folders", False)
if self.got_data_roots:
# if there is a global data rootfolder file, read from there
if not os.path.isfile(self.got_data_roots):
self.error(f"no such file: {self.got_data_roots}")
self.data_roots = configparser.ConfigParser()
self.data_roots.read(self.got_data_roots)
except (ModuleNotFoundError, AttributeError):
self.config = None
self.got_data_roots = False

def get_path(self, entry):
"""
This method allows the user to get the directory path for the given argument.
"""
root = os.path.join(self.config["EXP"]["root"], "")
name = self.config["EXP"]["name"]
try:
entryn = self.config["EXP"][entry]
except KeyError:
# some default values
if self.config is None:
# If no configuration file is provided, use default paths
if entry == "fig_dir":
entryn = "./images/"
dir_name = "./images/"
elif entry == "res_dir":
entryn = "./results/"
dir_name = "./results/"
elif entry == "model_dir":
entryn = "./models/"
dir_name = "./models/"
else:
entryn = "./store/"

# Expand image, model and result directories with run index
if entry == "fig_dir" or entry == "res_dir" or entry == "model_dir":
run = self.config_val("EXP", "run", 0)
entryn = entryn + f"run_{run}/"
dir_name = "./store/"
else:
root = os.path.join(self.config["EXP"]["root"], "")
name = self.config["EXP"]["name"]
try:
entryn = self.config["EXP"][entry]
except KeyError:
# some default values
if entry == "fig_dir":
entryn = "./images/"
elif entry == "res_dir":
entryn = "./results/"
elif entry == "model_dir":
entryn = "./models/"
else:
entryn = "./store/"

# Expand image, model and result directories with run index
if entry == "fig_dir" or entry == "res_dir" or entry == "model_dir":
run = self.config_val("EXP", "run", 0)
entryn = entryn + f"run_{run}/"

dir_name = f"{root}{name}/{entryn}"

dir_name = f"{root}{name}/{entryn}"
audeer.mkdir(dir_name)
return dir_name

Expand Down Expand Up @@ -101,7 +116,8 @@ def config_val_data(self, dataset, key, default):
)
return default
if not default in self.stopvals:
self.debug(f"value for {key} not found, using default: {default}")
self.debug(
f"value for {key} not found, using default: {default}")
return default

def set_config(self, config):
Expand Down Expand Up @@ -138,7 +154,8 @@ def make_segmented_index(self, df):
if len(df) == 0:
return df
if not isinstance(df.index, pd.MultiIndex):
df.index = audformat.utils.to_segmented_index(df.index, allow_nat=False)
df.index = audformat.utils.to_segmented_index(
df.index, allow_nat=False)
return df

def _get_value_descript(self, section, name):
Expand Down Expand Up @@ -243,19 +260,23 @@ def check_df(self, i, df):
print(df.head(1))

def config_val(self, section, key, default):
if self.config is None:
return default
try:
return self.config[section][key]
except KeyError:
if not default in self.stopvals:
self.debug(f"value for {key} not found, using default: {default}")
if default not in self.stopvals:
self.debug(
f"value for {key} not found, using default: {default}")
return default

def config_val_list(self, section, key, default):
try:
return ast.literal_eval(self.config[section][key])
except KeyError:
if not default in self.stopvals:
self.debug(f"value for {key} not found, using default: {default}")
self.debug(
f"value for {key} not found, using default: {default}")
return default

def continuous_to_categorical(self, series):
Expand Down

0 comments on commit 75d94de

Please sign in to comment.