forked from tracek/Ornithokrites
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecordings_io.py
152 lines (125 loc) · 4.66 KB
/
recordings_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 1 21:33:27 2013
@author: Lukasz Tracewski
Module for handling recordings' input and output.
"""
from __future__ import division
import os
import logging
import Tkinter, tkFileDialog
import numpy as np
import nose.tools as nt
import scipy.io.wavfile as wav
import s3connection
def get_data(bucket, data_store):
if bucket:
s3connection.read_data(bucket_name=bucket, output_recordings_dir=data_store)
if not data_store:
root = Tkinter.Tk()
root.withdraw()
data_store = tkFileDialog.askopenfilename()
def get_recordings_walker(data_store, bucket):
""" Returns recordings walker """
if bucket:
s3connection.read_data(bucket_name=bucket, output_recordings_dir=data_store)
if not data_store:
root = Tkinter.Tk()
root.withdraw()
data_store = tkFileDialog.askopenfilename()
walker = Walker(data_store)
return walker
def read(path):
""" Read wave file from the given path """
(rate, sample) = wav.read(path)
sample = sample.astype('float32')
sample /= np.max(np.abs(sample),axis=0) # Normalize sample
return rate, sample
def write(path, rate, sample, dB=30.0, output_dir="", segments=[]):
"""
Write signal to disc as wave file
Parameters
----------
path : string
Absolute or relative path.
rate : int
Sample rate in Hz.
sample : 1-d array
Single-channel audio sample.
dB : float (default = 30)
Optional number of decibels to which audio will be amplified or reduced.
Largest value in sample will be scaled to given number of dB.
output_dir : string (default = "")
Optional output directory. If provided, this directory will be created
(if not yet existing) and concatenated with path.
segments : list of touples (int, int) (default = no segmentation)
Optional list of segments. If provided only parts indicated by segments
will be saved to disc.
Returns
-------
Nothing
"""
if segments:
total_length = sum([end - start for start, end in segments])
new_sample = np.zeros(total_length)
idx = 0
for start, end in segments:
length = end - start
new_sample[idx:idx+length] = sample[start:end]
idx += length
sample = new_sample
if output_dir:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
filename = os.path.basename(path)
path = os.path.join(output_dir, filename)
max_sample = np.max(sample)
desired_dB_log = 10**(dB / 10)
sample *= desired_dB_log / max_sample
wav.write(path, rate, sample.astype('int16'))
class Walker(object):
""" Walk the directory structure and find all wave files """
def __init__(self, recordings_location):
"""
From the given location we will traverse all nodes and
find wave files
"""
self._recordings = []
if os.path.isfile(recordings_location) and recordings_location.endswith('.wav'):
self._recordings.append(recordings_location)
for dirpath, dirnames, filenames in os.walk(recordings_location):
for filename in [f for f in filenames if f.endswith('.wav') and not f.endswith('_seg.wav')]:
self._recordings.append(os.path.join(dirpath, filename))
nt.assert_true(self._recordings, "No recordings found!")
def read_wave(self):
"""
Generator for reading of wave files.
Generators are iterators, but you can only iterate over them once.
It's because they do not store all the values in memory,
they generate the values on the fly. It means that all wave files will
not be read at once, but on as-required basis.
Parameters
----------
None
Returns
-------
samplerate : int
Rate of the sample in Hz
sample : 1-d array
Wave file read as numpy array of int16
name : string
Name of a wave file
"""
for name in self._recordings:
(samplerate, sample) = wav.read(name)
yield samplerate, sample, name
def get_recordings_list(self):
""" Returns list of all recordings """
return self._recordings
def count(self):
""" Total number of recordings """
return len(self._recordings)
""" Test """
if __name__ == '__main__':
recordings_walker = Walker("./Recordings")
print recordings_walker.count()