7
7
8
8
class SoundComparison :
9
9
10
- def compare_waves (self , speaker_sound , correct_sound , location_to_save ):
10
+ def __init__ (self ):
11
+ self .result = {"too_little_breaks" : False , "too_many_breaks" : False ,
12
+ "short_breaks" : [], "long_breaks" : [],
13
+ "short_pronunciation" : [], "long_pronunciation" : []}
14
+
15
+ def compare_waves (self , speaker_sound , correct_sound , location ):
11
16
speaker_rate , pre_speaker_data = scipy .io .wavfile .read (speaker_sound )
12
17
correct_rate , correct_data = scipy .io .wavfile .read (correct_sound )
13
18
speaker_data = self .stereo_to_mono (pre_speaker_data )
@@ -24,14 +29,14 @@ def compare_waves(self, speaker_sound, correct_sound, location_to_save):
24
29
speaker_time = np .arange (0 , len (speaker_data ), 1 ) / speaker_rate
25
30
correct_time = np .arange (0 , len (correct_data ), 1 ) / correct_rate
26
31
27
- if self .check_for_long_breaks ( speaker_data , speaker_rate ) is not None :
28
- self .plot_graphs ( correct_time , correct_data , speaker_time , speaker_data , "#5cb85c" , location_to_save )
29
- return False
30
- self .plot_graphs (correct_time , correct_data , speaker_time , speaker_data , "#5cb85c" , location_to_save )
31
- return self . check_sensibility_of_breaks ( speaker_silence , correct_silence )
32
- # if self.check_for_amplitude_inconsistencies(speaker_data, speaker_rate, correct_data, correct_rate) is not None:
33
- # return False
34
- # return True
32
+ self .check_sensibility_of_breaks ( speaker_silence , correct_silence )
33
+ long_breaks = self .result [ "long_breaks" ]
34
+ if ( len ( long_breaks ) == 0 ):
35
+ self .plot_graphs (correct_time , correct_data , speaker_time , speaker_data , "#5cb85c" , location )
36
+ else :
37
+ self .plot_graphs ( correct_time , correct_data , speaker_time , speaker_data , "#f0ad4e" , location )
38
+ print ( long_breaks )
39
+ return long_breaks
35
40
36
41
def plot_graphs (self , correct_time , correct_data , speaker_time , speaker_data , color , location ):
37
42
# plot amplitude (or loudness) over time
@@ -83,8 +88,10 @@ def find_audio_chunk_breaks(self, audio_data, rate, silence_amplitude):
83
88
def check_sensibility_of_breaks (self , speaker_breaks , correct_breaks ):
84
89
#last_time_difference is used to track the interval between gaps so that past differences would not stack up
85
90
last_time_difference = 0
86
- if len (speaker_breaks ) != len (correct_breaks ):
87
- return False
91
+ if len (speaker_breaks ) > len (correct_breaks ):
92
+ self .result ["too_many_breaks" ] = True
93
+ elif len (speaker_breaks ) < len (correct_breaks ):
94
+ self .result ["too_little_breaks" ] = False
88
95
else :
89
96
for i in range (len (speaker_breaks )):
90
97
speaker_start = speaker_breaks [i ][0 ]
@@ -93,12 +100,17 @@ def check_sensibility_of_breaks(self, speaker_breaks, correct_breaks):
93
100
correct_start = correct_breaks [i ][0 ]
94
101
correct_end = correct_breaks [i ][1 ]
95
102
correct_break_time = correct_end - correct_start
96
- if abs (speaker_break_time - correct_break_time ) > 0.30 :
97
- return False
98
- if abs (speaker_start - correct_start ) > (last_time_difference + 0.2 ):
99
- return False
103
+ if (speaker_end - speaker_start ) > 2.2 :
104
+ self .result ["long_breaks" ].append (speaker_breaks [i ])
105
+ elif (speaker_break_time - correct_break_time ) > 0.30 :
106
+ self .result ["long_breaks" ].append (speaker_breaks [i ])
107
+ elif (correct_break_time - speaker_break_time ) > 0.30 :
108
+ self .result ["short_breaks" ].append (speaker_breaks [i ])
109
+ if (speaker_start - correct_start ) > (last_time_difference + 0.5 ):
110
+ self .result ["long_pronunciation" ].append ((speaker_breaks [i - 1 ][1 ], speaker_breaks [i ][0 ]))
111
+ elif (correct_start - speaker_start ) > (last_time_difference + 0.5 ):
112
+ self .result ["short_pronunciation" ].append ((speaker_breaks [i - 1 ][1 ], speaker_breaks [i ][0 ]))
100
113
last_time_difference = abs (correct_end - speaker_end )
101
- return True
102
114
103
115
def remove_audio_wave_silence (self , audio_data , rate , min = None ):
104
116
start_counter = 0
@@ -114,18 +126,18 @@ def calculate_silent_amplitude(self, audio_data, rate, min=None):
114
126
end = int (rate / 3 )
115
127
return max (max (audio_data [- end :]), 0.15 ) if min is None else 0.1
116
128
117
- def check_for_long_breaks (self , audio_data , rate ):
118
- counter = 0
119
- duration = 0
120
- while counter < len (audio_data ):
121
- if audio_data [counter ] < self .calculate_silent_amplitude (audio_data , rate ):
122
- duration += 1
123
- if duration > (2.2 * rate ):
124
- return counter / rate
125
- else :
126
- duration = 0
127
- counter += 1
128
- return None
129
+ # def check_for_long_breaks(self, audio_data, rate):
130
+ # counter = 0
131
+ # duration = 0
132
+ # while counter < len(audio_data):
133
+ # if audio_data[counter] < self.calculate_silent_amplitude(audio_data, rate):
134
+ # duration += 1
135
+ # if duration > (2.2 * rate):
136
+ # return counter / rate
137
+ # else:
138
+ # duration = 0
139
+ # counter += 1
140
+ # return None
129
141
130
142
# def check_for_amplitude_inconsistencies(self, audio_data1, rate1, audio_data2, rate2):
131
143
# chunk_audio1 = self.convert_audio_data_to_chunk_audio_data(audio_data1, rate1)
@@ -145,9 +157,6 @@ def convert_audio_data_to_chunk_audio_data(self, audio_data, rate):
145
157
chunk_audio_data [i ] = chunk_audio_data [i ] / (int (rate ) / 10 )
146
158
return chunk_audio_data , int (rate / 10 )
147
159
148
- def return_breaks (correct_loc , user_loc , location_to_save ):
149
- print (SoundComparison ().compare_waves (user_loc , correct_loc , location_to_save ))
150
-
151
160
if __name__ == "__main__" :
152
161
# web_file="C:\Users\Samuel\PycharmProjects\speech_analysis\wave_comparison"
153
162
#
@@ -173,5 +182,4 @@ def return_breaks(correct_loc, user_loc, location_to_save):
173
182
# string += str(abs(happy[i] / max))
174
183
# string += "|"
175
184
# print(happy[10400:15000])
176
-
177
- print (SoundComparison ().compare_waves ("D:/Haverford/LocalHack/speech_analysis/reconnect_app/static/Sounds/input_soundd99ec5ce7675.wav" , "D:/Haverford/LocalHack/speech_analysis/reconnect_app/static/Sounds/correct_sound145255b4ec90.wav" , "plots.png" ))
185
+ print (SoundComparison ().compare_waves ("D:/Haverford/LocalHack/speech_analysis/audio_samples/no_pause1.wav" , "D:/Haverford/LocalHack/speech_analysis/reconnect_app/static/Sounds/correct_sound09d546aba5a4.wav" , "plots.png" ))
0 commit comments