Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel forced alignment #319

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 116 additions & 65 deletions misc/scripts/alignment/state_align/forced_alignment.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os, sys
import time
import random
import glob

from sys import argv, stderr
from subprocess import check_call, Popen, CalledProcessError, PIPE
from mean_variance_norm import MeanVarianceNorm

# string constants for various shell calls
STATE_NUM=5
STATE_NUM = 5
F = str(0.01)
SFAC = str(5.0)
PRUNING = [str(i) for i in (250., 150., 2000.)]
Expand All @@ -15,31 +17,31 @@
HMMDEFS = 'hmmdefs'
VFLOORS = 'vFloors'

##
HTKDIR = path/to/tools/htk
HCompV = os.path.join(HTKDIR, 'HCompV')
HCopy = os.path.join(HTKDIR, 'HCopy' )
HERest = os.path.join(HTKDIR, 'HERest')
HHEd = os.path.join(HTKDIR, 'HHEd' )
HVite = os.path.join(HTKDIR, 'HVite' )

class ForcedAlignment(object):

def __init__(self):
def __init__(self, htk_dir):
self.proto = None
self.phoneme_mlf = None
self.set_htk_dir(htk_dir)

def set_htk_dir(self, htk_dir):
self.HCompV = os.path.join(htk_dir, 'HCompV')
self.HCopy = os.path.join(htk_dir, 'HCopy')
self.HERest = os.path.join(htk_dir, 'HERest')
self.HHEd = os.path.join(htk_dir, 'HHEd')
self.HVite = os.path.join(htk_dir, 'HVite')

def _make_proto(self):
## make proto
fid = open(self.proto, 'w')
means = ' '.join(['0.0' for _ in range(39)])
varg = ' '.join(['1.0' for _ in range(39)])
varg = ' '.join(['1.0' for _ in range(39)])
fid.write("""~o <VECSIZE> 39 <USER>
~h "proto"
<BEGINHMM>
<NUMSTATES> 7
""")
for i in range(2, STATE_NUM+2):
<NUMSTATES> 7""")
for i in range(2, STATE_NUM + 2):
fid.write('<STATE> {0}\n<MEAN> 39\n{1}\n'.format(i, means))
fid.write('<VARIANCE> 39\n{0}\n'.format(varg))
fid.write("""<TRANSP> 7
Expand All @@ -55,20 +57,20 @@ def _make_proto(self):
fid.close()

## make vFloors
check_call([HCompV, '-f', F, '-C', self.cfg,
'-S', self.train_scp,
'-M', self.cur_dir, self.proto])
check_call([self.HCompV, '-f', F, '-C', self.cfg,
'-S', self.train_scp,
'-M', self.cur_dir, self.proto])
## make local macro
# get first three lines from local proto
fid = open(os.path.join(self.cur_dir, MACROS), 'w')
source = open(os.path.join(self.cur_dir,
os.path.split(self.proto)[1]), 'r')
os.path.split(self.proto)[1]), 'r')
for _ in range(3):
fid.write(source.readline())
source.close()
# get remaining lines from vFloors
fid.writelines(open(os.path.join(self.cur_dir,
VFLOORS), 'r').readlines())
VFLOORS), 'r').readlines())
fid.close()
## make hmmdefs
fid = open(os.path.join(self.cur_dir, HMMDEFS), 'w')
Expand All @@ -95,7 +97,7 @@ def _read_file_list(self, file_name):
file_lists.append(line)
fid.close()

return file_lists
return file_lists

def _full_to_mono(self, full_file_name, mono_file_name, phoneme_dict):
fre = open(full_file_name, 'r')
Expand Down Expand Up @@ -137,7 +139,6 @@ def _check_data(self, file_id_list, multiple_speaker):
copy_scp.write('{0} {1}\n'.format(wav_file, mfc_file))
check_scp.write('{0}\n'.format(mfc_file))


if multiple_speaker:
tmp_list = file_id.split('/')
speaker_name = tmp_list[0]
Expand All @@ -149,7 +150,6 @@ def _check_data(self, file_id_list, multiple_speaker):
speaker_utt_dict['only_one'] = []
speaker_utt_dict['only_one'].append(mfc_file)


self._full_to_mono(lab_file, mono_lab_file, phoneme_dict)
copy_scp.close()
check_scp.close()
Expand All @@ -168,7 +168,7 @@ def _check_data(self, file_id_list, multiple_speaker):
fid.write('"*/*.lab" -> "' + self.mono_lab_dir + '"\n')
fid.close()

return speaker_utt_dict
return speaker_utt_dict

def _HCopy(self):
"""
Expand All @@ -187,7 +187,7 @@ def _HCopy(self):
NUMCHANS = 20
NUMCEPS = 12
""")
check_call([HCopy, '-C', self.cfg, '-S', self.copy_scp])
check_call([self.HCopy, '-C', self.cfg, '-S', self.copy_scp])
# write a CFG for what we just built
open(self.cfg, 'w').write("""TARGETRATE = 50000.0
TARGETKIND = USER
Expand Down Expand Up @@ -215,7 +215,7 @@ def _nxt_dir(self):

def prepare_training(self, file_id_list_name, wav_dir, lab_dir, work_dir, multiple_speaker):

print('---preparing enverionment')
print '---preparing enverionment'
self.cfg_dir = os.path.join(work_dir, 'config')
self.model_dir = os.path.join(work_dir, 'model')
self.cur_dir = os.path.join(self.model_dir, 'hmm0')
Expand All @@ -235,7 +235,7 @@ def prepare_training(self, file_id_list_name, wav_dir, lab_dir, work_dir, multip
# CFG
self.cfg = os.path.join(self.cfg_dir, 'cfg')

self.wav_dir=wav_dir
self.wav_dir = wav_dir
self.lab_dir = lab_dir
self.mfc_dir = os.path.join(work_dir, 'mfc')
if not os.path.exists(self.mfc_dir):
Expand All @@ -246,43 +246,77 @@ def prepare_training(self, file_id_list_name, wav_dir, lab_dir, work_dir, multip
os.makedirs(self.mono_lab_dir)

file_id_list = self._read_file_list(file_id_list_name)
print('---checking data')
print ('---checking data')
speaker_utt_dict = self._check_data(file_id_list, multiple_speaker)

print('---extracting features')
print ('---extracting features')
self._HCopy()
print(time.strftime("%c"))
print (time.strftime("%c"))

print('---feature_normalisation')
normaliser = MeanVarianceNorm(39)
for key_name in list(speaker_utt_dict.keys()):
print ('---feature_normalisation')
for key_name in speaker_utt_dict.keys():
normaliser = MeanVarianceNorm(39)
normaliser.feature_normalisation(speaker_utt_dict[key_name], speaker_utt_dict[key_name]) ## save to itself
print(time.strftime("%c"))
print (time.strftime("%c"))

print('---making proto')
print ('---making proto')
self._make_proto()

def train_hmm(self, niter, num_mix):
"""
Perform one or more rounds of estimation
"""

print(time.strftime("%c"))
print('---training HMM models')
print (time.strftime("%c"))
print ('---training HMM models')

# call HErest in multiple chunks
# split scp in num_splits chunks and save them
num_splits = int(os.getenv('DNN_NUM_PARALLEL', 8))
print ("----num_splits set to %s" % num_splits)
train_scp_chunks = []
with open(self.train_scp, "rt") as fp:
mfc_files = fp.readlines()
random.shuffle(mfc_files)
n = (len(mfc_files) + 1) / num_splits
mfc_chunks = [mfc_files[j:j + n] for j in xrange(0, len(mfc_files), n)]
for i in range(len(mfc_chunks)):
train_scp_chunks.append(os.path.join(self.cfg_dir, "train_%d.scp" % i))
with open(train_scp_chunks[i], "wt") as fp:
fp.writelines(mfc_chunks[i])

done = 0
mix = 1
while mix <= num_mix and done == 0:
for i in range(niter):
next_dir = os.path.join(self.model_dir, 'hmm_mix_' + str(mix) + '_iter_' + str(i+1))
next_dir = os.path.join(self.model_dir, 'hmm_mix_' + str(mix) + '_iter_' + str(i + 1))
if not os.path.exists(next_dir):
os.makedirs(next_dir)
check_call([HERest, '-C', self.cfg, '-S', self.train_scp,
'-I', self.phoneme_mlf,

procs = []
# estimate per chunk
for chunk_num in range(len(train_scp_chunks)):
procs.append(Popen([self.HERest, '-C', self.cfg,
'-S', train_scp_chunks[chunk_num],
'-I', self.phoneme_mlf,
'-M', next_dir,
'-H', os.path.join(self.cur_dir, MACROS),
'-H', os.path.join(self.cur_dir, HMMDEFS),
'-t'] + PRUNING + ['-p', str(chunk_num + 1), self.phonemes],
stdout=PIPE))

# wait until all HERest calls are finished
for p in procs:
p.wait()

# now accumulate
check_call([self.HERest, '-C', self.cfg,
'-M', next_dir,
'-H', os.path.join(self.cur_dir, MACROS),
'-H', os.path.join(self.cur_dir, HMMDEFS),
'-t'] + PRUNING + [self.phonemes],
'-t'] + PRUNING + ['-p', '0', self.phonemes] + glob.glob(next_dir + os.sep + "*.acc"),
stdout=PIPE)

self.cur_dir = next_dir

if mix * 2 <= num_mix:
Expand All @@ -296,10 +330,10 @@ def train_hmm(self, niter, num_mix):
if not os.path.exists(next_dir):
os.makedirs(next_dir)

check_call( [HHEd, '-A',
'-H', os.path.join(self.cur_dir, MACROS),
'-H', os.path.join(self.cur_dir, HMMDEFS),
'-M', next_dir] + [hed_file] + [self.phonemes])
check_call([self.HHEd, '-A',
'-H', os.path.join(self.cur_dir, MACROS),
'-H', os.path.join(self.cur_dir, HMMDEFS),
'-M', next_dir] + [hed_file] + [self.phonemes])

self.cur_dir = next_dir
mix = mix * 2
Expand All @@ -310,11 +344,11 @@ def align(self, work_dir, lab_align_dir):
"""
Align using the models in self.cur_dir and MLF to path
"""
print('---aligning data')
print(time.strftime("%c"))
print ('---aligning data')
print (time.strftime("%c"))
self.align_mlf = os.path.join(work_dir, 'mono_align.mlf')

check_call([HVite, '-a', '-f', '-m', '-y', 'lab', '-o', 'SM',
check_call([self.HVite, '-a', '-f', '-m', '-y', 'lab', '-o', 'NM',
'-i', self.align_mlf, '-L', self.mono_lab_dir,
'-C', self.cfg, '-S', self.train_scp,
'-H', os.path.join(self.cur_dir, MACROS),
Expand All @@ -328,6 +362,7 @@ def _postprocess(self, mlf, lab_align_dir):
if not os.path.exists(lab_align_dir):
os.makedirs(lab_align_dir)

fstats = open("logprob.txt", "wt")
state_num = STATE_NUM
fid = open(mlf, 'r')
line = fid.readline()
Expand All @@ -339,45 +374,61 @@ def _postprocess(self, mlf, lab_align_dir):
line = line.replace('"', '')
file_base = os.path.basename(line)
flab = open(os.path.join(self.lab_dir, file_base), 'r')
fw = open(os.path.join(lab_align_dir, file_base), 'w')
fw = open(os.path.join(lab_align_dir, file_base), 'w')
lab_logprob = 0.0
lab_entries = 0
for full_lab in flab.readlines():
full_lab = full_lab.strip()
for i in range(state_num):
line = fid.readline()
line = line.strip()
tmp_list = line.split()
fw.write('{0} {1} {2}[{3}]\n'.format(tmp_list[0], tmp_list[1], full_lab, i+2))

fw.write('{0} {1} {2}[{3}]\n'.format(tmp_list[0], tmp_list[1], full_lab, i + 2))
lab_logprob += float(tmp_list[3])
lab_entries += 1
fw.close()
flab.close()
fstats.write(file_base + " " + str(lab_entries) + " " + str(lab_logprob / lab_entries))
line = fid.readline()
line = line.strip()
if line != '.':
print('The two files are not matched!\n')
print ('The two files are not matched!\n')
sys.exit(1)
fid.close()
fstats.close()


if __name__ == '__main__':

work_dir = os.getcwd()

wav_dir = os.path.join(work_dir, 'slt_wav')
lab_dir = os.path.join(work_dir, 'label_no_align')
lab_align_dir = os.path.join(work_dir, 'label_state_align')

file_id_list_name = os.path.join(work_dir, 'file_id_list.scp')

## if multiple_speaker is tuned on. the file_id_list.scp has to reflact this
## for example
## speaker_1/0001
## speaker_2/0001
## This is to do speaker-dependent normalisation
multiple_speaker = False

aligner = ForcedAlignment()
aligner.prepare_training(file_id_list_name, wav_dir, lab_dir, work_dir, multiple_speaker)

if '-a' in sys.argv:
sys.argv.remove('-a')
multiple_speaker = True

# hack to keep old run_aligner scripts (with sed repalce) compatible
# they replace "HTKDIR =" and "work_dir ="
if (len(sys.argv)) > 1:
work_real_dir = sys.argv[1]
else:
work_dir = os.getcwd()
work_real_dir = work_dir
if len(sys.argv) > 2:
htk_dir = sys.argv[2]
else:
HTKDIR = None
htk_dir = HTKDIR

wav_dir = os.path.join(work_real_dir, 'slt_wav')
lab_dir = os.path.join(work_real_dir, 'label_no_align')
lab_align_dir = os.path.join(work_real_dir, 'label_state_align')
file_id_list_name = os.path.join(work_real_dir, 'file_id_list.scp')

aligner = ForcedAlignment(htk_dir)
aligner.prepare_training(file_id_list_name, wav_dir, lab_dir, work_real_dir, multiple_speaker)
aligner.train_hmm(7, 32)
aligner.align(work_dir, lab_align_dir)
print('---done!')
aligner.align(work_real_dir, lab_align_dir)
print ('---done!')