Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ldzhangyx committed Jul 26, 2023
1 parent df07290 commit 7641698
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 5 deletions.
8 changes: 8 additions & 0 deletions melodytalk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,18 @@ def clear_input_audio(self):
if not os.path.exists("checkpoints"):
os.mkdir("checkpoints")
bot = ConversationBot()

with gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}") as demo:

gr.Markdown(
"""This is a demo to our work *MelodyTalk*.
"""
)

lang = gr.Radio(choices=['Chinese', 'English'], value=None, label='Language')
chatbot = gr.Chatbot(elem_id="chatbot", label="MelodyTalk")
state = gr.State([])

with gr.Row(visible=False) as input_raws:
with gr.Column(scale=0.7):
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an audio").style(
Expand Down
10 changes: 6 additions & 4 deletions melodytalk/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from utils import *

DURATION = 6
DURATION = 8
GENERATION_CANDIDATE = 5

# Initialze common models
Expand Down Expand Up @@ -66,11 +66,10 @@ def description_to_attributes_wrapper(self, description: str) -> str:




# attribute management
global attribute_table
attribute_table = GlobalAttributes()


class Text2Music(object):
def __init__(self, device):
print("Initializing Text2Music")
Expand All @@ -86,6 +85,7 @@ def __init__(self, device):

def inference(self, text):
music_filename = os.path.join("music", f"{str(uuid.uuid4())[:8]}.wav")
attribute_table.descriptions = text
text = description_to_attributes(text) # convert text to attributes
wav = self.model.generate([text], progress=False)
wav = wav[0] # batch size is 1
Expand All @@ -112,6 +112,7 @@ def __init__(self, device):

def inference(self, inputs):
music_filename, text = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
attribute_table.descriptions = text
text = description_to_attributes(text) # convert text to attributes
print(f"Generating music from text with melody condition, Input Text: {text}, Melody: {music_filename}.")
updated_music_filename = get_new_audio_name(music_filename, func_name="remix")
Expand Down Expand Up @@ -174,6 +175,7 @@ def __init__(self, device):

def inference(self, inputs):
music_filename, text = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
attribute_table.descriptions = merge_description(attribute_table.descriptions, text)
text = addtrack_demand_to_description(text)
print(f"Adding a new track, Input text: {text}, Previous track: {music_filename}.")
updated_music_filename = get_new_audio_name(music_filename, func_name="addtrack")
Expand All @@ -187,7 +189,7 @@ def inference(self, inputs):
splitted_audios = split_audio_tensor_by_downbeats(wav.cpu(), self.model.sample_rate, True)
# select the best one by CLAP scores
print(f"CLAP post filter for {len(splitted_audios)} candidates.")
best_wav, _ = CLAP_post_filter(CLAP_model, text, splitted_audios.cuda(), self.model.sample_rate)
best_wav, _ = CLAP_post_filter(CLAP_model, attribute_table.descriptions, splitted_audios.cuda(), self.model.sample_rate)
audio_write(updated_music_filename[:-4],
best_wav.cpu(), self.model.sample_rate, strategy="loudness", loudness_compressor=True)
print(f"\nProcessed AddNewTrack, Output Music: {updated_music_filename}.")
Expand Down
28 changes: 27 additions & 1 deletion melodytalk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,32 @@ def addtrack_demand_to_description(description: str) -> str:

return response.choices[0].text

def merge_description(description_1: str, description_2: str) -> str:
openai_prompt = f"""Please merge two descriptions into one.
S1: Please generate a rock music with drum and guitar for me.
S2: Please add a saxophone track to this music.
A: rock music with drum, guitar and saxophone.
S1: Please generate a love pop song with piano and violin for me.
S2: Please remove the piano.
A: love pop song with violin.
S1: {description_1}.
S2: {description_2}.
A: """

response = openai.Completion.create(
model="text-davinci-003",
prompt=openai_prompt,
temperature=0,
max_tokens=100,
top_p=1,
frequency_penalty=0.0,
presence_penalty=0.0,
)

return response.choices[0].text

def chord_generation(description: str) -> tp.List:
""" This function is a trick to generate chord sequence from the description.
Expand Down Expand Up @@ -216,7 +242,7 @@ def CLAP_post_filter(clap_model,
# get the index of the most similar audio
index = torch.argmax(similarity)
best = audio_candidates[index].view(1, -1)
best = resampy.resample(best.cpu().numpy(), 48000, audio_sr, axis=-1)
best = torch.from_numpy(resampy.resample(best.cpu().numpy(), 48000, audio_sr, axis=-1))
# return
return best, similarity[index]

Expand Down

0 comments on commit 7641698

Please sign in to comment.