-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsegmentation.py
37 lines (31 loc) · 1.2 KB
/
segmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import whisper
class Segmenting:
def __init__(self):
self.segments = []
def get_segments(self, proportion_coeff):
subset_size = int(proportion_coeff * len(self.segments))
subset_segments = self.segments[:subset_size]
return subset_segments
class AudioSegmenting(Segmenting):
def __init__(self, device):
self.model = whisper.load_model("base").to(device)
super().__init__()
def transcribe(self, audio_path):
result = self.model.transcribe(audio_path)
self.segments = result['segments']
# Provides segments for given RTTM file
class FileSegmenting(Segmenting):
def __init__(self, rttm_path):
self.rttm_path = rttm_path
super().__init__()
def transcribe(self):
labeled_segments = []
with open(self.rttm_path, "r") as file:
for line in file:
parts = line.strip().split()
start = float(parts[3])
duration = float(parts[4])
end = start + duration
speaker = parts[7]
labeled_segments.append({"start": start, "end": end, "text": "", "speaker": speaker})
self.segments = labeled_segments