From 4c4b1ecce3cf15ddde2aac68359101db2a8ef651 Mon Sep 17 00:00:00 2001 From: Bias Date: Fri, 3 Jul 2020 15:58:37 +0200 Subject: [PATCH] Enabled multithreaded transcribing again as I'm not sure if it's only my env which OOMs --- align/align.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/align/align.py b/align/align.py index c9ef3ce..0157cd1 100644 --- a/align/align.py +++ b/align/align.py @@ -485,15 +485,10 @@ def pre_filter(): samples = list(progress(pre_filter(), desc='VAD splitting')) - # TODO: OOM - #pool = multiprocessing.Pool(initializer=init_stt, - # initargs=(output_graph_path, scorer_path), - # processes=args.stt_workers) - #transcripts = list(progress(pool.imap(stt, samples), desc='Transcribing', total=len(samples))) - transcripts = [] - init_stt(output_graph_path, scorer_path) - for sample in samples: - transcripts.append(stt(sample)) + pool = multiprocessing.Pool(initializer=init_stt, + initargs=(output_graph_path, scorer_path), + processes=args.stt_workers) + transcripts = list(progress(pool.imap(stt, samples), desc='Transcribing', total=len(samples))) fragments = [] for time_start, time_end, segment_transcript in transcripts: