Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ldzhangyx committed Aug 31, 2023
1 parent 18329ba commit 087b752
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 87 deletions.
Binary file modified .DS_Store
Binary file not shown.
63 changes: 16 additions & 47 deletions melodytalk/dependencies/transplayer/inference.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
import librosa
import librosa as lr
import resampy

def transform(filepath):
audio, sr = librosa.load(filepath)
if sr != 16000:
audio = resampy.resample(audio, sr, 16000)
cqt_representation = lr.cqt(audio, sr=sr, hop_length=256)

cqt_magnitude = np.abs(cqt_representation)


import os
import argparse
import torch
import numpy as np
from math import ceil
Expand All @@ -26,11 +15,11 @@ def pad_seq(x, base=32):
assert len_pad >= 0
return np.pad(x, ((0, len_pad), (0, 0)), 'constant'), len_pad


def inference(input_file_path,
output_file_path,
org='piano', trg='piano',
cp_path=None):
org='piano', trg='guitar',
feature_len=2400,
cp_path='weights.pth'):
G = Generator(dim_neck=32,
dim_emb=4,
dim_pre=512,
Expand All @@ -39,58 +28,38 @@ def inference(input_file_path,
save_info = torch.load(cp_path)
G.load_state_dict(save_info["model"])

# process input
audio, sr = lr.load(input_file_path)
if sr != 16000:
audio = resampy.resample(audio, sr, 16000)
cqt_representation = lr.cqt(audio, sr=sr, hop_length=256)
cqt_magnitude = np.abs(cqt_representation)

# one-hot
ins_list = ['harp', 'trumpet', 'epiano', 'viola', 'piano', 'guitar', 'organ', 'flute']
ins_org = org
ins_trg = trg
emb_org = ins_list.index(ins_org)
emb_trg = ins_list.index(ins_trg)
# emb_org = [i == ins_org for i in ins_list]
# emb_trg = [i == ins_trg for i in ins_list]
emb_org = torch.unsqueeze(torch.tensor(emb_org), dim=0).to(device)
emb_trg = torch.unsqueeze(torch.tensor(emb_trg), dim=0).to(device)

x_org = np.log(np.load(config.feature_path).T)[:config.feature_len]
x_org = np.log(cqt_magnitude.T)[:feature_len]
# x_org = np.load(config.spectrogram_path).T
x_org, len_pad = pad_seq(x_org)
x_org = torch.from_numpy(x_org[np.newaxis, :, :]).to(device)

with torch.no_grad():
_, x_identic_psnt, _ = G(x_org, emb_org, emb_org)
if len_pad == 0:
x_trg = x_identic_psnt[0, 0, :, :].cpu().numpy()
else:
x_trg = x_identic_psnt[0, 0, :-len_pad, :].cpu().numpy()

np.save(os.path.basename(config.feature_path)[:-4] + "_" + ins_org + "_" + ins_org + ".npy", x_trg.T)
print("result saved.")

with torch.no_grad():
_, x_identic_psnt, _ = G(x_org, emb_org, emb_trg)
if len_pad == 0:
x_trg = x_identic_psnt[0, 0, :, :].cpu().numpy()
else:
x_trg = x_identic_psnt[0, 0, :-len_pad, :].cpu().numpy()

np.save(os.path.basename(config.feature_path)[:-4] + "_" + ins_org + "_" + ins_trg + ".npy", x_trg.T)
print("result saved.")
# save output
waveform = lr.icqt(x_trg, sr=sr, hop_length=256)
lr.output.write_wav(output_file_path, waveform, sr)


if __name__ == '__main__':
parser = argparse.ArgumentParser()

# Model configuration.
parser.add_argument('--lambda_cd', type=float, default=0, help='weight for hidden code loss')
# Training configuration.
parser.add_argument('--feature_path', type=str, default='../../data_syn/cropped/piano_all_00.wav_cqt.npy')
parser.add_argument('--feature_len', type=int, default=2400)
# parser.add_argument('--num_iters', type=int, default=1000000, help='number of total iterations')
# parser.add_argument('--len_crop', type=int, default=128, help='dataloader output sequence length')

# Miscellaneous.
parser.add_argument('--cp_path', type=str,
default="../../autovc_cp/weights_log_cqt_down32_neck32_onehot4_withcross")

config = parser.parse_args()
print(config)
inference(config)
inference('/home/intern-2023-02/melodytalk/melodytalk/music/2d2c_piano_2333801f_2333801f.wav', 'output.wav')
15 changes: 9 additions & 6 deletions melodytalk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def __init__(self):
"ReArrangement": "cuda:0",
"Text2MusicWithDrum": "cuda:0",
"Text2MusicWithTitle": "cuda:0",
"AddNewTrack": "cuda:0",
"MusicInpainting": "cuda:0",
"Variation": "cuda:0",
"PitchShifting": "cuda:0",
Expand Down Expand Up @@ -193,6 +192,13 @@ def run_text(self, text, state):
# f"Current Memory: {self.agent.memory.buffer}")
return state, state

def redo(self, state):
state = state[:-1]
text = self.agent.memory.chat_memory.messages[-1].content
state = state[:-1]
self.agent.memory.chat_memory.messages = self.self.agent.memory.chat_memory.messages[:-2]
return self.run_text(text, state)

def run_audio(self, file, state, txt, lang):
music_filename = os.path.join('music', str(uuid.uuid4())[0:8] + ".wav")
print("Inputs:", file, state)
Expand Down Expand Up @@ -248,7 +254,7 @@ def clear_input_audio(self):
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an audio").style(
container=False)
with gr.Column(scale=0.15, min_width=0):
undo = gr.Button("Undo")
redo = gr.Button("Redo")
with gr.Column(scale=0.15, min_width=0):
clear = gr.Button("Clear")
with gr.Column(scale=0.15, min_width=0):
Expand All @@ -271,15 +277,12 @@ def clear_input_audio(self):
| Impression to music | 1 | Generate a music loop feels like "Hey Jude"'s choral part. | ChatGPT, MusicGen |
| Stylistic rearrangement | 1 | Rearrange this music audio to jazz with saxophone solo. | MusicGen |
| Music variation | 1 | Generate a music loop sounds like this music. | VampNet |
| Add a track | 2 | Add a saxophone solo to this music loop. | MusicGen, CLAP |
| Remove a track | 2 | Remove the guitar from this music loop. | Demucs |
| Re-generation/inpainting | 2 | Re-generate the 3-5s part of the music loop. | VampNet |
| Pitch shifting | 2 | Shift this music by 3 semitone. | pedalboard |
| Speed changing | 2 | Speed up this music by 1.2. | torchaudio |
| Add sound effects| 2 | Add some reverb to the guitar solo. | pedalboard, automix-tools |
| Music captioning | N/A | Describe the current music loop. | LP-MusicCaps |
| * Replace instrument (unavailable) | 2 | Replace the guitar solo by piano. | Transplayer, automix-tools |
| * Timbre adjustment (unavailable) | 2 | Make the drum to sound more metallic. | ChatGPT, pedalboard, automix-tools |
"""
)

Expand All @@ -292,7 +295,7 @@ def clear_input_audio(self):
txt.submit(bot.run_text, [txt, state], [chatbot, state])
txt.submit(lambda: "", None, txt)
btn.upload(bot.run_audio, [btn, state, txt, lang], [chatbot, state])

redo.click(bot.redo, [state], [chatbot, state])
rec_submit.click(bot.run_recording, [rec_audio, state, txt, lang], [chatbot, state])
rec_clear.click(bot.clear_input_audio, None, rec_audio)

Expand Down
66 changes: 33 additions & 33 deletions melodytalk/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,46 +517,46 @@ def __init__(self, device):
def inference(self, inputs):
music_filename, user_message = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
print(f"Add a single sound effect to the given music, Input Music: {music_filename}, Sound Effect Name: {user_message}.")
updated_music_filename = get_new_audio_name(music_filename, func_name="single_sound_effect")
updated_music_filename = get_new_audio_name(music_filename, func_name="singlesoundeffect")
sound_effect = add_single_sound_effect(user_message)
my_pedalboard = pedalboard.Pedalboard()
my_pedalboard.append(eval(sound_effect))
input_audio, sr = torchaudio.load(music_filename)
output_audio = my_pedalboard(input_audio.numpy(), sample_rate=sr)
audio_write(updated_music_filename[:-4],
output_audio, sr, strategy="loudness", loudness_compressor=True)
torch.from_numpy(output_audio), sr, strategy="loudness", loudness_compressor=True)
print(f"\nProcessed SingleSoundEffect, Output Music: {updated_music_filename}.")
return updated_music_filename


# class TimbreTransfer(object):
# def __init__(self, device):
# print("Initializing TimbreTransfer")
# self.device = device
# self.interface = interface
#
# @prompts(
# name="Transfer the timbre of the given music to another music.",
# description="useful if you want to transfer the timbre of the given music to another music."
# "like: transfer the timbre of this music to another music."
# "The input to this tool should be a comma separated string of two, "
# "representing the music_filename and the original user message."
# )
#
# def inference(self, inputs):
# music_filename, user_message = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
# print(f"Transfer the timbre of the given music to another music, Input Music: {music_filename}, Target Music: {user_message}.")
# updated_music_filename = get_new_audio_name(music_filename, func_name="timbre_transfer")
# target_music_filename = get_new_audio_name(user_message, func_name="timbre_transfer")
# # load
# wav, sr = torchaudio.load(music_filename)
# target_wav, target_sr = torchaudio.load(user_message)
# # stretch
# wav = torchaudio.functional.time_stretch(wav, sr, target_sr/sr)[0]
# # write
# audio_write(updated_music_filename[:-4],
# wav.cpu(), sr, strategy="loudness", loudness_compressor=True)
# audio_write(target_music_filename[:-4],
# target_wav.cpu(), target_sr, strategy="loudness", loudness_compressor=True)
# print(f"\nProcessed TimbreTransfer, Output Music: {updated_music_filename}.")
# return updated_music_filename
class TimbreTransfer(object):
def __init__(self, device):
print("Initializing TimbreTransfer")
self.device = device
self.interface = interface

@prompts(
name="Transfer the timbre of the given music to another music.",
description="useful if you want to transfer the timbre of the given music to another music."
"like: transfer the timbre of this music to another music."
"The input to this tool should be a comma separated string of two, "
"representing the music_filename and the original user message."
)

def inference(self, inputs):
music_filename, user_message = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
print(f"Transfer the timbre of the given music to another music, Input Music: {music_filename}, Target Music: {user_message}.")
updated_music_filename = get_new_audio_name(music_filename, func_name="timbre_transfer")
target_music_filename = get_new_audio_name(user_message, func_name="timbre_transfer")
# load
wav, sr = torchaudio.load(music_filename)
target_wav, target_sr = torchaudio.load(user_message)
# stretch
wav = torchaudio.functional.time_stretch(wav, sr, target_sr/sr)[0]
# write
audio_write(updated_music_filename[:-4],
wav.cpu(), sr, strategy="loudness", loudness_compressor=True)
audio_write(target_music_filename[:-4],
target_wav.cpu(), target_sr, strategy="loudness", loudness_compressor=True)
print(f"\nProcessed TimbreTransfer, Output Music: {updated_music_filename}.")
return updated_music_filename
2 changes: 1 addition & 1 deletion melodytalk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def add_single_sound_effect(input: str) -> str:
Let us think step by step.
Q: I want to use a 200hz highpass filter to this audio.
A: pedalboard.HighpassFilter(cutoff_frequency_hz=200);
A: pedalboard.HighpassFilter(cutoff_frequency_hz=200)
Q: {input}.
A: """
Expand Down

0 comments on commit 087b752

Please sign in to comment.