Skip to content

Commit

Permalink
modified: nkululeko/feat_extract/feats_agender.py
Browse files Browse the repository at this point in the history
	modified:   nkululeko/feat_extract/feats_auddim.py
	modified:   nkululeko/feat_extract/feats_audmodel.py
	modified:   nkululeko/feat_extract/feats_clap.py
	modified:   nkululeko/feat_extract/feats_hubert.py
	modified:   nkululeko/feat_extract/feats_oxbow.py
	modified:   nkululeko/feat_extract/feats_praat.py
	modified:   nkululeko/feat_extract/feats_trill.py
	modified:   nkululeko/feat_extract/feats_wav2vec2.py
	modified:   nkululeko/feat_extract/featureset.py
	modified:   nkululeko/feature_extractor.py
  • Loading branch information
bagustris committed Apr 23, 2024
1 parent a7a05cd commit 5c03a72
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 53 deletions.
10 changes: 6 additions & 4 deletions nkululeko/feat_extract/feats_agender.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@
import audinterface


class AudModelAgenderSet(Featureset):
class AgenderSet(Featureset):
"""
Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
"Speech-based Age and Gender Prediction with Transformers"
https://arxiv.org/abs/2306.16962
"""

def __init__(self, name, data_df):
super().__init__(name, data_df)
def __init__(self, name, data_df, feats_type):
super().__init__(name, data_df, feats_type)
self.model_loaded = False
self.feats_type = feats_type

def _load_model(self):
model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
Expand All @@ -28,7 +29,8 @@ def _load_model(self):
if not os.path.isdir(model_root):
cache_root = audeer.mkdir("cache")
model_root = audeer.mkdir(model_root)
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
archive_path = audeer.download_url(
model_url, cache_root, verbose=True)
audeer.extract_archive(archive_path, model_root)
device = self.util.config_val("MODEL", "device", "cpu")
self.model = audonnx.load(model_root, device=device)
Expand Down
8 changes: 5 additions & 3 deletions nkululeko/feat_extract/feats_auddim.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,19 @@ class AuddimSet(Featureset):
https://arxiv.org/abs/2203.07378.
"""

def __init__(self, name, data_df):
super().__init__(name, data_df)
def __init__(self, name, data_df, feats_type):
super().__init__(name, data_df, feats_type)
self.model_loaded = False
self.feats_types = feats_type

def _load_model(self):
model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
model_root = self.util.config_val("FEATS", "aud.model", "./audmodel/")
if not os.path.isdir(model_root):
cache_root = audeer.mkdir("cache")
model_root = audeer.mkdir(model_root)
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
archive_path = audeer.download_url(
model_url, cache_root, verbose=True)
audeer.extract_archive(archive_path, model_root)
cuda = "cuda" if torch.cuda.is_available() else "cpu"
device = self.util.config_val("MODEL", "device", cuda)
Expand Down
8 changes: 5 additions & 3 deletions nkululeko/feat_extract/feats_audmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,19 @@ class AudmodelSet(Featureset):
https://arxiv.org/abs/2203.07378.
"""

def __init__(self, name, data_df):
super().__init__(name, data_df)
def __init__(self, name, data_df, feats_type):
super().__init__(name, data_df, feats_type)
self.model_loaded = False
self.feats_type = feats_type

def _load_model(self):
model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
model_root = self.util.config_val("FEATS", "aud.model", "./audmodel/")
if not os.path.isdir(model_root):
cache_root = audeer.mkdir("cache")
model_root = audeer.mkdir(model_root)
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
archive_path = audeer.download_url(
model_url, cache_root, verbose=True)
audeer.extract_archive(archive_path, model_root)
cuda = "cuda" if torch.cuda.is_available() else "cpu"
device = self.util.config_val("MODEL", "device", cuda)
Expand Down
16 changes: 10 additions & 6 deletions nkululeko/feat_extract/feats_clap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
import audiofile


class Clap(Featureset):
class ClapSet(Featureset):
"""Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""

def __init__(self, name, data_df):
def __init__(self, name, data_df, feats_type):
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
super().__init__(name, data_df)
super().__init__(name, data_df, feats_type)
self.device = self.util.config_val("MODEL", "device", "cpu")
self.model_initialized = False
self.feat_type = feats_type

def init_model(self):
# load model
Expand All @@ -32,12 +33,14 @@ def extract(self):
store = self.util.get_path("store")
store_format = self.util.config_val("FEATS", "store_format", "pkl")
storage = f"{store}{self.name}.{store_format}"
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
extract = self.util.config_val(
"FEATS", "needs_feature_extraction", False)
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
if extract or no_reuse or not os.path.isfile(storage):
if not self.model_initialized:
self.init_model()
self.util.debug("extracting clap embeddings, this might take a while...")
self.util.debug(
"extracting clap embeddings, this might take a while...")
emb_series = pd.Series(index=self.data_df.index, dtype=object)
length = len(self.data_df.index)
for idx, (file, start, end) in enumerate(
Expand All @@ -51,7 +54,8 @@ def extract(self):
)
emb = self.get_embeddings(signal, sampling_rate)
emb_series[idx] = emb
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
self.df = pd.DataFrame(
emb_series.values.tolist(), index=self.data_df.index)
self.util.write_store(self.df, storage, store_format)
try:
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
Expand Down
5 changes: 3 additions & 2 deletions nkululeko/feat_extract/feats_hubert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# feats_hubert.py
# HuBERT feature extractor for Nkululeko
# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k"
# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k",
# "hubert-base-ls960", hubert-large-ls960-ft", "hubert-xlarge-ls960-ft"


import os
Expand All @@ -22,7 +23,7 @@ class Hubert(Featureset):
def __init__(self, name, data_df, feat_type):
"""Constructor. is_train is needed to distinguish from test/dev sets,
because they use the codebook from the training"""
super().__init__(name, data_df)
super().__init__(name, data_df, feat_type)
# check if device is not set, use cuda if available
cuda = "cuda" if torch.cuda.is_available() else "cpu"
self.device = self.util.config_val("MODEL", "device", cuda)
Expand Down
27 changes: 16 additions & 11 deletions nkululeko/feat_extract/feats_oxbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
class Openxbow(Featureset):
"""Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""

def __init__(self, name, data_df, is_train=False):
def __init__(self, name, data_df, feats_type, is_train=False):
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
super().__init__(name, data_df)
super().__init__(name, data_df, feats_type)
self.feats_types = feats_type
self.is_train = is_train

def extract(self):
Expand All @@ -21,11 +22,13 @@ def extract(self):
self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
store = self.util.get_path("store")
storage = f"{store}{self.name}_{self.featset}.pkl"
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
extract = self.util.config_val(
"FEATS", "needs_feature_extraction", False)
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
if extract or no_reuse or not os.path.isfile(storage):
# extract smile features first
self.util.debug("extracting openSmile features, this might take a while...")
self.util.debug(
"extracting openSmile features, this might take a while...")
smile = opensmile.Smile(
feature_set=self.feature_set,
feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
Expand All @@ -48,7 +51,13 @@ def extract(self):
# save the smile features
smile_df.to_csv(lld_name, sep=";", header=False)
# get the path of the xbow java jar file
xbow_path = self.util.config_val("FEATS", "xbow.model", "../openXBOW/")
xbow_path = self.util.config_val(
"FEATS", "xbow.model", "openXBOW")
# check if JAR file exist
if not os.path.isfile(f"{xbow_path}/openXBOW.jar"):
# download using wget if not exist and locate in xbow_path
os.system(
f"git clone https://github.com/openXBOW/openXBOW")
# get the size of the codebook
size = self.util.config_val("FEATS", "size", 500)
# get the number of assignements
Expand All @@ -57,16 +66,12 @@ def extract(self):
if self.is_train:
# store the codebook
os.system(
f"java -jar {xbow_path}openXBOW.jar -i"
f" {lld_name} -standardizeInput -log -o"
f" {xbow_name} -size {size} -a {assignments} -B"
f" {codebook_name}"
f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -standardizeInput -log -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}"
)
else:
# use the codebook
os.system(
f"java -jar {xbow_path}openXBOW.jar -i {lld_name} "
f" -o {xbow_name} -b {codebook_name}"
f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -o {xbow_name} -b {codebook_name}"
)
# read in the result from disk
xbow_df = pd.read_csv(xbow_name, sep=";", header=None)
Expand Down
13 changes: 8 additions & 5 deletions nkululeko/feat_extract/feats_praat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,20 @@ class PraatSet(Featureset):
"""

def __init__(self, name, data_df):
super().__init__(name, data_df)
def __init__(self, name, data_df, feats_type):
super().__init__(name, data_df, feats_type)

def extract(self):
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
store = self.util.get_path("store")
store_format = self.util.config_val("FEATS", "store_format", "pkl")
storage = f"{store}{self.name}.{store_format}"
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
extract = self.util.config_val(
"FEATS", "needs_feature_extraction", False)
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
if extract or no_reuse or not os.path.isfile(storage):
self.util.debug("extracting Praat features, this might take a while...")
self.util.debug(
"extracting Praat features, this might take a while...")
self.df = feinberg_praat.compute_features(self.data_df.index)
self.df = self.df.set_index(self.data_df.index)
for i, col in enumerate(self.df.columns):
Expand All @@ -52,7 +54,8 @@ def extract(self):
self.df = self.df.astype(float)

def extract_sample(self, signal, sr):
import audiofile, audformat
import audiofile
import audformat

tmp_audio_names = ["praat_audio_tmp.wav"]
audiofile.write(tmp_audio_names[0], signal, sr)
Expand Down
16 changes: 10 additions & 6 deletions nkululeko/feat_extract/feats_trill.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# feats_trill.py
import tensorflow_hub as hub
import os
import tensorflow as tf
from numpy.core.numeric import tensordot
Expand All @@ -11,7 +12,6 @@

# Import TF 2.X and make sure we're running eager.
assert tf.executing_eagerly()
import tensorflow_hub as hub


class TRILLset(Featureset):
Expand All @@ -20,7 +20,7 @@ class TRILLset(Featureset):
"""https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""

# Initialization of the class
def __init__(self, name, data_df):
def __init__(self, name, data_df, feats_type):
"""
Initialize the class with name, data and Util instance
Also loads the model from hub
Expand All @@ -31,28 +31,32 @@ def __init__(self, name, data_df):
:type data_df: DataFrame
:return: None
"""
super().__init__(name, data_df)
super().__init__(name, data_df, feats_type)
# Load the model from the configured path
model_path = self.util.config_val(
"FEATS",
"trill.model",
"https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
)
self.module = hub.load(model_path)
self.feats_type = feats_type

def extract(self):
store = self.util.get_path("store")
storage = f"{store}{self.name}.pkl"
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
extract = self.util.config_val(
"FEATS", "needs_feature_extraction", False)
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
if extract or no_reuse or not os.path.isfile(storage):
self.util.debug("extracting TRILL embeddings, this might take a while...")
self.util.debug(
"extracting TRILL embeddings, this might take a while...")
emb_series = pd.Series(index=self.data_df.index, dtype=object)
length = len(self.data_df.index)
for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
emb = self.getEmbeddings(file)
emb_series[idx] = emb
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
self.df = pd.DataFrame(
emb_series.values.tolist(), index=self.data_df.index)
self.df.to_pickle(storage)
try:
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
Expand Down
23 changes: 16 additions & 7 deletions nkululeko/feat_extract/feats_wav2vec2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# feats_wav2vec2.py
# feat_types example = wav2vec2-large-robust-ft-swbd-300h
""" feats_wav2vec2.py
feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
Complete list: https://huggingface.co/facebook?search_models=wav2vec2
Currently only supports wav2vec2
"""

import os
from tqdm import tqdm
import pandas as pd
Expand All @@ -16,11 +22,11 @@ class Wav2vec2(Featureset):

def __init__(self, name, data_df, feat_type):
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
super().__init__(name, data_df)
super().__init__(name, data_df, feat_type)
cuda = "cuda" if torch.cuda.is_available() else "cpu"
self.device = self.util.config_val("MODEL", "device", cuda)
self.model_initialized = False
if feat_type == "wav2vec" or feat_type == "wav2vec2":
if feat_type == "wav2vec2":
self.feat_type = "wav2vec2-large-robust-ft-swbd-300h"
else:
self.feat_type = feat_type
Expand All @@ -33,7 +39,8 @@ def init_model(self):
)
config = transformers.AutoConfig.from_pretrained(model_path)
layer_num = config.num_hidden_layers
hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
hidden_layer = int(self.util.config_val(
"FEATS", "wav2vec2.layer", "0"))
config.num_hidden_layers = layer_num - hidden_layer
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
Expand All @@ -48,7 +55,8 @@ def extract(self):
"""Extract the features or load them from disk if present."""
store = self.util.get_path("store")
storage = f"{store}{self.name}.pkl"
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
extract = self.util.config_val(
"FEATS", "needs_feature_extraction", False)
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
if extract or no_reuse or not os.path.isfile(storage):
if not self.model_initialized:
Expand All @@ -69,7 +77,8 @@ def extract(self):
emb = self.get_embeddings(signal, sampling_rate, file)
emb_series[idx] = emb
# print(f"emb_series shape: {emb_series.shape}")
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
self.df = pd.DataFrame(
emb_series.values.tolist(), index=self.data_df.index)
# print(f"df shape: {self.df.shape}")
self.df.to_pickle(storage)
try:
Expand Down
9 changes: 6 additions & 3 deletions nkululeko/feat_extract/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

class Featureset:
name = "" # designation
df = None # pandas dataframe to store the features (and indexed with the data from the sets)
df = None # pandas dataframe to store the features
# (and indexed with the data from the sets)
data_df = None # dataframe to get audio paths

def __init__(self, name, data_df):
def __init__(self, name, data_df, feats_type):
self.name = name
self.data_df = data_df
self.util = Util("featureset")
self.feats_types = feats_type

def extract(self):
pass
Expand All @@ -23,7 +25,8 @@ def filter(self):
self.df = self.df[self.df.index.isin(self.data_df.index)]
try:
# use only some features
selected_features = ast.literal_eval(glob_conf.config["FEATS"]["features"])
selected_features = ast.literal_eval(
glob_conf.config["FEATS"]["features"])
self.util.debug(f"selecting features: {selected_features}")
sel_feats_df = pd.DataFrame()
hit = False
Expand Down
Loading

0 comments on commit 5c03a72

Please sign in to comment.