Merge branch 'felixbur:main' into add-db

bagustris · May 15, 2024 · 75d94de · 75d94de
2 parents 8ad188f + 201ddf9
commit 75d94de
Show file tree

Hide file tree

Showing 5 changed files with 143 additions and 91 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 Changelog
 =========
 
+Version 0.84.1
+--------------
+* made resample independent of config file
+
 Version 0.84.0
 --------------
 * added SHAP analysis

diff --git a/nkululeko/augmenting/resampler.py b/nkululeko/augmenting/resampler.py
@@ -12,16 +12,19 @@
 
 
 class Resampler:
-    def __init__(self, df, not_testing=True):
+    def __init__(self, df, replace, not_testing=True):
         self.SAMPLING_RATE = 16000
         self.df = df
         self.util = Util("resampler", has_config=not_testing)
         self.util.warn(f"all files might be resampled to {self.SAMPLING_RATE}")
         self.not_testing = not_testing
+        self.replace = eval(self.util.config_val(
+            "RESAMPLE", "replace", "False")) if not not_testing else replace
 
     def resample(self):
         files = self.df.index.get_level_values(0).values
-        replace = eval(self.util.config_val("RESAMPLE", "replace", "False"))
+        # replace = eval(self.util.config_val("RESAMPLE", "replace", "False"))
+        replace = self.replace
         if self.not_testing:
             store = self.util.get_path("store")
         else:
@@ -42,7 +45,8 @@ def resample(self):
                 continue
             if org_sr != self.SAMPLING_RATE:
                 self.util.debug(f"resampling {f} (sr = {org_sr})")
-                resampler = torchaudio.transforms.Resample(org_sr, self.SAMPLING_RATE)
+                resampler = torchaudio.transforms.Resample(
+                    org_sr, self.SAMPLING_RATE)
                 signal = resampler(signal)
                 if replace:
                     torchaudio.save(
@@ -59,7 +63,8 @@ def resample(self):
             self.df = self.df.set_index(
                 self.df.index.set_levels(new_files, level="file")
             )
-            target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
+            target_file = self.util.config_val(
+                "RESAMPLE", "target", "resampled.csv")
             # remove encoded labels
             target = self.util.config_val("DATA", "target", "emotion")
             if "class_label" in self.df.columns:

diff --git a/nkululeko/constants.py b/nkululeko/constants.py
@@ -1,2 +1,2 @@
-VERSION="0.84.0"
+VERSION="0.84.1"
 SAMPLING_RATE = 16000
diff --git a/nkululeko/resample.py b/nkululeko/resample.py
@@ -1,78 +1,100 @@
 # resample.py
-# change the sampling rate for train and test splits
+# change the sampling rate for audio file or INI file (train, test, all)
 
 import argparse
 import configparser
 import os
-
 import pandas as pd
-
+import audformat
 from nkululeko.augmenting.resampler import Resampler
+from nkululeko.utils.util import Util
+
 from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
-from nkululeko.utils.util import Util
 
 
 def main(src_dir):
     parser = argparse.ArgumentParser(
-        description="Call the nkululeko RESAMPLE  framework.")
-    parser.add_argument("--config", default="exp.ini",
+        description="Call the nkululeko RESAMPLE framework.")
+    parser.add_argument("--config", default=None,
                         help="The base configuration")
+    parser.add_argument("--file", default=None,
+                        help="The input audio file to resample")
+    parser.add_argument("--replace", action="store_true",
+                        help="Replace the original audio file")
+
     args = parser.parse_args()
-    if args.config is not None:
-        config_file = args.config
-    else:
-        config_file = f"{src_dir}/exp.ini"
 
-    # test if the configuration file exists
-    if not os.path.isfile(config_file):
-        print(f"ERROR: no such file: {config_file}")
+    if args.file is None and args.config is None:
+        print("ERROR: Either --file or --config argument must be provided.")
         exit()
 
-    # load one configuration per experiment
-    config = configparser.ConfigParser()
-    config.read(config_file)
-    # create a new experiment
-    expr = Experiment(config)
-    module = "resample"
-    expr.set_module(module)
-    util = Util(module)
-    util.debug(
-        f"running {expr.name} from config {config_file}, nkululeko version"
-        f" {VERSION}"
-    )
-
-    if util.config_val("EXP", "no_warnings", False):
-        import warnings
-
-        warnings.filterwarnings("ignore")
-
-    # load the data
-    expr.load_datasets()
-
-    # split into train and test
-    expr.fill_train_and_tests()
-    util.debug(
-        f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
-
-    sample_selection = util.config_val("RESAMPLE", "sample_selection", "all")
-    if sample_selection == "all":
-        df = pd.concat([expr.df_train, expr.df_test])
-    elif sample_selection == "train":
-        df = expr.df_train
-    elif sample_selection == "test":
-        df = expr.df_test
+    if args.file is not None:
+        # Load the audio file into a DataFrame
+        files = pd.Series([args.file])
+        df_sample = pd.DataFrame(index=files)
+        df_sample.index = audformat.utils.to_segmented_index(
+            df_sample.index, allow_nat=False
+        )
+
+        # Resample the audio file
+        util = Util("resampler", has_config=False)
+        util.debug(f"Resampling audio file: {args.file}")
+        rs = Resampler(df_sample, not_testing=True, replace=args.replace)
+        rs.resample()
     else:
-        util.error(
-            f"unknown selection specifier {sample_selection}, should be [all |"
-            " train | test]"
+        # Existing code for handling INI file
+        config_file = args.config
+
+        # Test if the configuration file exists
+        if not os.path.isfile(config_file):
+            print(f"ERROR: no such file: {config_file}")
+            exit()
+
+        # Load one configuration per experiment
+        config = configparser.ConfigParser()
+        config.read(config_file)
+        # Create a new experiment
+        expr = Experiment(config)
+        module = "resample"
+        expr.set_module(module)
+        util = Util(module)
+        util.debug(
+            f"running {expr.name} from config {config_file}, nkululeko version"
+            f" {VERSION}"
         )
-    util.debug(f"resampling {sample_selection}: {df.shape[0]} samples")
-    rs = Resampler(df)
-    rs.resample()
-    print("DONE")
+
+        if util.config_val("EXP", "no_warnings", False):
+            import warnings
+            warnings.filterwarnings("ignore")
+
+        # Load the data
+        expr.load_datasets()
+
+        # Split into train and test
+        expr.fill_train_and_tests()
+        util.debug(
+            f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
+
+        sample_selection = util.config_val(
+            "RESAMPLE", "sample_selection", "all")
+        if sample_selection == "all":
+            df = pd.concat([expr.df_train, expr.df_test])
+        elif sample_selection == "train":
+            df = expr.df_train
+        elif sample_selection == "test":
+            df = expr.df_test
+        else:
+            util.error(
+                f"unknown selection specifier {sample_selection}, should be [all |"
+                " train | test]"
+            )
+        util.debug(f"resampling {sample_selection}: {df.shape[0]} samples")
+        replace = util.config_val("RESAMPLE", "replace", "False")
+        rs = Resampler(df, replace=replace)
+        rs.resample()
 
 
 if __name__ == "__main__":
     cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main(cwd)
diff --git a/nkululeko/utils/util.py b/nkululeko/utils/util.py
@@ -33,43 +33,58 @@ def __init__(self, caller=None, has_config=True):
         else:
             self.caller = ""
         if has_config:
-            import nkululeko.glob_conf as glob_conf
-
-            self.config = glob_conf.config
-            self.got_data_roots = self.config_val("DATA", "root_folders", False)
-            if self.got_data_roots:
-                # if there is a global data rootfolder file, read from there
-                if not os.path.isfile(self.got_data_roots):
-                    self.error(f"no such file: {self.got_data_roots}")
-                self.data_roots = configparser.ConfigParser()
-                self.data_roots.read(self.got_data_roots)
-                # self.debug(f"getting data roots from {self.got_data_roots}")
+            try:
+                import nkululeko.glob_conf as glob_conf
+                self.config = glob_conf.config
+                self.got_data_roots = self.config_val(
+                    "DATA", "root_folders", False)
+                if self.got_data_roots:
+                    # if there is a global data rootfolder file, read from there
+                    if not os.path.isfile(self.got_data_roots):
+                        self.error(f"no such file: {self.got_data_roots}")
+                    self.data_roots = configparser.ConfigParser()
+                    self.data_roots.read(self.got_data_roots)
+            except (ModuleNotFoundError, AttributeError):
+                self.config = None
+                self.got_data_roots = False
 
     def get_path(self, entry):
         """
         This method allows the user to get the directory path for the given argument.
         """
-        root = os.path.join(self.config["EXP"]["root"], "")
-        name = self.config["EXP"]["name"]
-        try:
-            entryn = self.config["EXP"][entry]
-        except KeyError:
-            # some default values
+        if self.config is None:
+            # If no configuration file is provided, use default paths
             if entry == "fig_dir":
-                entryn = "./images/"
+                dir_name = "./images/"
             elif entry == "res_dir":
-                entryn = "./results/"
+                dir_name = "./results/"
             elif entry == "model_dir":
-                entryn = "./models/"
+                dir_name = "./models/"
             else:
-                entryn = "./store/"
-
-        # Expand image, model and result directories with run index
-        if entry == "fig_dir" or entry == "res_dir" or entry == "model_dir":
-            run = self.config_val("EXP", "run", 0)
-            entryn = entryn + f"run_{run}/"
+                dir_name = "./store/"
+        else:
+            root = os.path.join(self.config["EXP"]["root"], "")
+            name = self.config["EXP"]["name"]
+            try:
+                entryn = self.config["EXP"][entry]
+            except KeyError:
+                # some default values
+                if entry == "fig_dir":
+                    entryn = "./images/"
+                elif entry == "res_dir":
+                    entryn = "./results/"
+                elif entry == "model_dir":
+                    entryn = "./models/"
+                else:
+                    entryn = "./store/"
+
+            # Expand image, model and result directories with run index
+            if entry == "fig_dir" or entry == "res_dir" or entry == "model_dir":
+                run = self.config_val("EXP", "run", 0)
+                entryn = entryn + f"run_{run}/"
+
+            dir_name = f"{root}{name}/{entryn}"
 
-        dir_name = f"{root}{name}/{entryn}"
         audeer.mkdir(dir_name)
         return dir_name
 
@@ -101,7 +116,8 @@ def config_val_data(self, dataset, key, default):
                         )
                     return default
             if not default in self.stopvals:
-                self.debug(f"value for {key} not found, using default: {default}")
+                self.debug(
+                    f"value for {key} not found, using default: {default}")
             return default
 
     def set_config(self, config):
@@ -138,7 +154,8 @@ def make_segmented_index(self, df):
         if len(df) == 0:
             return df
         if not isinstance(df.index, pd.MultiIndex):
-            df.index = audformat.utils.to_segmented_index(df.index, allow_nat=False)
+            df.index = audformat.utils.to_segmented_index(
+                df.index, allow_nat=False)
         return df
 
     def _get_value_descript(self, section, name):
@@ -243,19 +260,23 @@ def check_df(self, i, df):
         print(df.head(1))
 
     def config_val(self, section, key, default):
+        if self.config is None:
+            return default
         try:
             return self.config[section][key]
         except KeyError:
-            if not default in self.stopvals:
-                self.debug(f"value for {key} not found, using default: {default}")
+            if default not in self.stopvals:
+                self.debug(
+                    f"value for {key} not found, using default: {default}")
             return default
 
     def config_val_list(self, section, key, default):
         try:
             return ast.literal_eval(self.config[section][key])
         except KeyError:
             if not default in self.stopvals:
-                self.debug(f"value for {key} not found, using default: {default}")
+                self.debug(
+                    f"value for {key} not found, using default: {default}")
             return default
 
     def continuous_to_categorical(self, series):