-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbeatmosaic.py
329 lines (260 loc) · 12.6 KB
/
beatmosaic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageOps, ImageTk, ImageDraw
import numpy as np
import sounddevice as sd
import os
import wave
import queue
#================================================================================
#================================================================================
# preset
SAMPLE_RATE = 44100
DURATION = 0.1
MAX_FREQ = 2000
MIN_FREQ = 100
def save_wav(filename, audio, rate):
with wave.open(filename, 'w') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(rate)
wf.writeframes(audio.tobytes())
def normalize_waveform(wave):
return (wave / np.max(np.abs(wave)) * 32767).astype(np.int16)
def dynamic_range_compression(wave, threshold=-20.0, ratio=4.0):
# Convert threshold from dB to linear scale
threshold_amplitude = 10.0 ** (threshold / 20.0)
# Apply compression
compressed_wave = np.where(
wave > threshold_amplitude,
threshold_amplitude + (wave - threshold_amplitude) / ratio,
wave
)
return compressed_wave
#================================================================================
#================================================================================
# sound generation
def generate_sine_wave(freq, duration, volume):
t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
wave = (volume * np.sin(2 * np.pi * freq * t) * 32767).astype(np.int16)
return normalize_waveform(wave)
def generate_square_wave(freq, duration, volume):
t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
signal = np.sin(2 * np.pi * freq * t)
wave = (volume * np.sign(signal) * 32767).astype(np.int16)
return normalize_waveform(wave)
def generate_sawtooth_wave(freq, duration, volume):
t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
signal = 2.0 * (t * freq - np.floor(t * freq + 0.5))
wave = (volume * signal * 32767).astype(np.int16)
return normalize_waveform(wave)
def generate_triangle_wave(freq, duration, volume):
t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
signal = 2.0 * np.abs(2.0 * (t * freq - np.floor(t * freq + 0.5))) - 1.0
wave = (volume * signal * 32767).astype(np.int16)
return normalize_waveform(wave)
def generate_arpeggio(base_freq, duration, volume, direction="up"):
frequencies = [base_freq, base_freq * 1.25, base_freq * 1.5]
waves = [generate_sine_wave(freq, duration, volume) for freq in frequencies]
if direction == "down":
waves = waves[::-1]
wave = np.concatenate(waves)
return normalize_waveform(wave)
def generate_chord(root_freq, duration, volume):
frequencies = [root_freq, root_freq * 1.25, root_freq * 1.5]
waves = [generate_sine_wave(freq, duration, volume) for freq in frequencies]
wave = np.sum(waves, axis=0)
return normalize_waveform(wave)
def generate_hi_hat(duration):
wave = np.random.uniform(-1, 1, int(SAMPLE_RATE * duration))
return normalize_waveform(wave)
def generate_kick(duration):
wave = generate_sine_wave(60, duration, 1.0)
envelope = np.linspace(1, 0, int(SAMPLE_RATE * duration))
wave = wave * envelope
return normalize_waveform(wave)
def generate_snare(duration):
tone = generate_sine_wave(200, duration * 0.1, 0.5)
noise = np.random.uniform(-1, 1, int(SAMPLE_RATE * duration))
noise_start = len(tone)
noise = noise[:len(noise) - noise_start]
wave = np.concatenate([tone, noise])
return normalize_waveform(wave)
#================================================================================
#================================================================================
# sound fx
def apply_rhythmic_pattern_corrected(wave, pattern):
sample_pattern = np.repeat(pattern, len(wave) // len(pattern))
return wave * sample_pattern
def apply_amplitude_modulation(wave, freq, depth):
t = np.linspace(0, DURATION, int(SAMPLE_RATE * DURATION), endpoint=False)
modulator = 1 + depth * np.sin(2 * np.pi * freq * t)
return wave * modulator
def adjust_attack_release(wave, attack_time, release_time):
attack_samples = int(SAMPLE_RATE * attack_time)
release_samples = int(SAMPLE_RATE * release_time)
attack_envelope = np.linspace(0, 1, attack_samples)
sustain_envelope = np.ones(len(wave) - attack_samples - release_samples)
release_envelope = np.linspace(1, 0, release_samples)
envelope = np.concatenate([attack_envelope, sustain_envelope, release_envelope])
return wave * envelope
def apply_reverb(sound, num_reflections=5, decay_factor=0.6): # Increased reflections and decay
reverbed_sound = np.copy(sound)
for i in range(1, num_reflections + 1):
delayed_sound = np.roll(sound, i * 2000) * (decay_factor ** i)
reverbed_sound = (reverbed_sound.astype(np.float64) + delayed_sound).astype(np.int64)
return reverbed_sound
def apply_delay(sound, delay_time=0.03, decay_factor=0.7):
delayed_sound = np.roll(sound, int(SAMPLE_RATE * delay_time))
return sound + delayed_sound * decay_factor
def apply_rhythm(sound, pattern=[1, 0, 1, 0, 0]):
expanded_pattern = []
for p in pattern:
expanded_pattern.extend([p] * 4410) # Repeat each value 4410 times for a slower rhythm
sample_pattern = np.tile(expanded_pattern, len(sound) // len(expanded_pattern) + 1)
return sound * sample_pattern[:len(sound)]
#================================================================================
#================================================================================
# main function
def play_image_as_audio_with_buttons(img, filepath):
base_name = os.path.splitext(os.path.basename(filepath))[0]
output_folder = base_name + " Sound Pack"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
section_width = img.width // 4
section_height = img.height // 4
audio_samples = {} # dictionary to store audio samples for each grid
for i in range(4):
for j in range(4):
left = i * section_width
upper = j * section_height
right = left + section_width
lower = upper + section_height
section = img.crop((left, upper, right, lower))
section_rgb = np.array(section)
r = np.mean(section_rgb[:,:,0])
g = np.mean(section_rgb[:,:,1])
b = np.mean(section_rgb[:,:,2])
grayscale_section = ImageOps.grayscale(section)
brightness = np.mean(np.array(grayscale_section)) / 255.0
contrast = np.std(np.array(grayscale_section)) / 255.0
hsv_section = section.convert('HSV')
saturation = np.mean(np.array(hsv_section)[:,:,1]) / 255.0
# Determine waveform based on multiple factors
if brightness < 0.33 and contrast < 0.5:
wave_func = generate_sine_wave
elif brightness < 0.66 or saturation > 0.5:
wave_func = generate_square_wave
elif contrast > 0.7:
wave_func = generate_sawtooth_wave
else:
wave_func = generate_triangle_wave
# Duration based on RGB
duration_modifier = ((r + g + b) / 3) / 255.0
wave_duration = DURATION * duration_modifier
# Assign designated drum sounds and name accordingly
if i == 0 and j == 0:
wave = generate_kick(wave_duration)
output_file = os.path.join(output_folder, "kick.wav")
elif i == 1 and j == 0:
wave = generate_snare(wave_duration)
output_file = os.path.join(output_folder, "snare.wav")
elif i == 2 and j == 0:
wave = generate_hi_hat(wave_duration)
output_file = os.path.join(output_folder, "hi_hat.wav")
elif i == 3 and j == 0:
center_y = upper + section_height // 2
freq = np.interp(center_y, [0, img.height], [MAX_FREQ, MIN_FREQ])
wave = generate_triangle_wave(freq, wave_duration, brightness) # Representing the tom sound
output_file = os.path.join(output_folder, "tom.wav")
else:
center_y = upper + section_height // 2
freq = np.interp(center_y, [0, img.height], [MAX_FREQ, MIN_FREQ])
wave = wave_func(freq, wave_duration, brightness)
output_file = os.path.join(output_folder, f"section_{i+1}x{j+1}.wav")
# Rhythmic Variation
rhythm_pattern = [int(r > 128), int(g > 128), int(b > 128)]
wave = apply_rhythm(wave, rhythm_pattern)
# Apply Effects
if saturation < 0.33:
wave = apply_reverb(wave)
elif saturation < 0.66:
wave = apply_delay(wave)
else:
wave = apply_reverb(wave)
wave = apply_delay(wave)
save_wav(output_file, wave, SAMPLE_RATE)
audio_samples[(i, j)] = wave # Store the waveform in the dictionary
return audio_samples
#================================================================================
#================================================================================
def select_image_with_buttons():
filepath = filedialog.askopenfilename()
if not filepath:
return
global image # Making it global for other functions to access
image = Image.open(filepath)
audio_samples = play_image_as_audio_with_buttons(image, filepath)
for i in range(4):
for j in range(4):
section_photo = ImageTk.PhotoImage(image=image.crop((i*image.width//4, j*image.height//4, (i+1)*image.width//4, (j+1)*image.height//4)))
btn = tk.Button(frame, image=section_photo, command=lambda i=i, j=j: play_section_sound(i, j, audio_samples))
btn.image = section_photo
btn.grid(row=i, column=j, sticky="nsew")
#================================================================================
#================================================================================
def create_grid_overlay(image):
# Draw 4x4 grid on the image
draw = ImageDraw.Draw(image)
width, height = image.size
for i in range(1, 4):
draw.line([(width/4)*i, 0, (width/4)*i, height], fill="white")
draw.line([0, (height/4)*i, width, (height/4)*i], fill="white")
return image
#================================================================================
#================================================================================
def play_section_sound(i, j, audio_samples):
# Convert the audio sample to int16 and play the sound of a specific grid section
sd.play(audio_samples[(i, j)].astype(np.int16))
#================================================================================
#================================================================================
# Global variable for recording queue
recorded_samples = queue.Queue()
def record_callback(outdata, frames, time, status):
"""This function will be called in a separate thread by sounddevice during recording."""
recorded_samples.put(outdata.copy())
def start_recording():
global recording_stream
recording_stream = sd.InputStream(samplerate=SAMPLE_RATE, channels=1, dtype=np.int16, callback=record_callback)
recording_stream.start()
def stop_recording():
global recording_stream
recording_stream.stop()
recording_stream.close()
def save_recording():
filename = filedialog.asksaveasfilename(defaultextension=".wav", filetypes=[("WAV files", "*.wav")])
if not filename:
return
with wave.open(filename, 'w') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(SAMPLE_RATE)
while not recorded_samples.empty():
wf.writeframes(recorded_samples.get())
#================================================================================
#================================================================================
# GUI Setup
root = tk.Tk()
root.title("Image to Audio Sample Pack Grid")
# Create a frame for image and buttons
frame = tk.Frame(root)
frame.pack(pady=20)
# Create a 4x4 grid of empty frames by default
labels = [[None for _ in range(4)] for _ in range(4)]
for i in range(4):
for j in range(4):
labels[i][j] = tk.Label(frame, relief="solid", borderwidth=1, width=15, height=6)
labels[i][j].grid(row=i, column=j, padx=5, pady=5)
btn_select = tk.Button(root, text="Select Image", command=select_image_with_buttons)
btn_select.pack(pady=20)
root.mainloop()