-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathDB_reader.py
84 lines (72 loc) · 3.6 KB
/
DB_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Modification of the function 'DBspeech_wav_reader.py' of the deep-speaker created by philipperemy
Working on python 3
Input : DB path
Output : 1) Make DB structure using pd.DataFrame which has 3 columns (file id, file path, speaker id, DB id)
=> 'read_DB_structure' function
2) Read a wav file from DB structure
=> 'read_audio' function
"""
import logging
import os
from glob import glob
#import glob2 # for python2
import librosa
import numpy as np
import pandas as pd
import configure as c
from configure import SAMPLE_RATE
np.set_printoptions(threshold=np.nan)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('max_colwidth', 100)
def find_wavs(directory, pattern='**/*.wav'):
"""Recursively finds all files matching the pattern."""
return glob(os.path.join(directory, pattern), recursive=True)
#return glob2.glob(os.path.join(directory, pattern)) # for python2
def find_feats(directory, pattern='**/*.p'):
"""Recursively finds all files matching the pattern."""
return glob(os.path.join(directory, pattern), recursive=True)
#return glob2.glob(os.path.join(directory, pattern)) # for python2
def read_audio(filename, sample_rate=SAMPLE_RATE):
audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
audio = audio.flatten()
return audio
def convert_filename_to_labelpath(filename):
if 'train' in filename:
label_folder = os.path.join(c.LABEL_DIR, 'train') # ex) sohn_result/train
spk_and_feat_name = ('/').join(filename.split('/')[-2:]).replace('.p','.mat') # ex) 322F3065/SNR322F3MIC065188.mat
label_path = os.path.join(label_folder, spk_and_feat_name)
if 'test' in filename:
label_folder = os.path.join(c.LABEL_DIR, 'test') # ex) sohn_result/test
spk_and_feat_name = ('/').join(filename.split('/')[-2:]).replace('.p','.mat') # ex) 322F3065/SNR322F3MIC065188.mat
label_path = os.path.join(label_folder, spk_and_feat_name)
return label_path
def read_DB_structure(directory):
DB = pd.DataFrame()
DB['filename'] = find_feats(directory) # filename (feature)
DB['filename'] = DB['filename'].apply(lambda x: x.replace('\\', '/')) # normalize windows paths
DB['label_path'] = DB['filename'].apply(lambda x: convert_filename_to_labelpath(x)) # label path
DB['dataset_id'] = DB['filename'].apply(lambda x: x.split('/')[-3]) # dataset folder name
#num_speakers = len(DB['speaker_id'].unique())
#logging.info('Found {} files with {} different speakers.'.format(str(len(DB)).zfill(7), str(num_speakers).zfill(5)))
logging.info(DB.head(10))
return DB
def read_feats_structure(directory):
DB = pd.DataFrame()
DB['filename'] = find_feats(directory) # filename
DB['filename'] = DB['filename'].apply(lambda x: x.replace('\\', '/')) # normalize windows paths
DB['speaker_id'] = DB['filename'].apply(lambda x: x.split('/')[-2]) # speaker folder name
DB['dataset_id'] = DB['filename'].apply(lambda x: x.split('/')[-3]) # dataset folder name
num_speakers = len(DB['speaker_id'].unique())
logging.info('Found {} files with {} different speakers.'.format(str(len(DB)).zfill(7), str(num_speakers).zfill(5)))
logging.info(DB.head(10))
return DB
def test():
DB_dir = '/home/administrator/Desktop/DB/Speaker_robot_train_DB'
DB = read_DB_structure(DB_dir)
test_wav = read_audio(DB[0:1]['filename'].values[0])
return DB, test_wav
if __name__ == '__main__':
DB, test_wav = test()