Skip to content

Commit

Permalink
update bpm and key condition
Browse files Browse the repository at this point in the history
  • Loading branch information
ldzhangyx committed Jun 28, 2023
1 parent 7052668 commit 9eb6328
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,5 @@ cython_debug/

output/
assets/

.DS_Store
5 changes: 4 additions & 1 deletion melodytalk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,10 @@ def clear_input_audio(self):
if not os.path.exists("checkpoints"):
os.mkdir("checkpoints")
parser = argparse.ArgumentParser()
parser.add_argument('--load', type=str, default="Text2Music_cuda:0, ExtractTrack_cuda:0, Text2MusicWithMelody_cuda:0")
parser.add_argument('--load', type=str, default="Text2Music_cuda:0, "
"ExtractTrack_cuda:0, "
"Text2MusicWithMelody_cuda:0,"
"SimpleTracksMixing_cuda:0")
args = parser.parse_args()
load_dict = {e.split('_')[0].strip(): e.split('_')[1].strip() for e in args.load.split(',')}
bot = ConversationBot(load_dict=load_dict)
Expand Down
11 changes: 6 additions & 5 deletions melodytalk/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# source separation
import demucs.separate

from utils import prompts, get_new_audio_name
from utils import prompts, get_new_audio_name, description_to_attributes, cut_dialogue_history


# Initialze common models
Expand All @@ -32,7 +32,7 @@ def __init__(self, device):

def inference(self, text):
music_filename = os.path.join("music", f"{str(uuid.uuid4())[:8]}.wav")
prompt = text
text = description_to_attributes(text) # convert text to attributes
wav = self.model.generate([text], progress=False)
wav = wav[0] # batch size is 1
audio_write(music_filename[:-4],
Expand All @@ -47,15 +47,16 @@ def __init__(self, device):
self.model = musicgen_model

@prompts(
name="Generate music from user input text with melody condition",
description="useful if you want to generate, style transfer or remix music from a user input text with a given melody condition."
name="Generate music from user input text with melody or track condition",
description="useful if you want to generate, style transfer or remix music from a user input text with a given melody or track condition."
"like: remix the given melody with text description, or doing style transfer as text described with the given melody."
"The input to this tool should be a comma separated string of two, "
"representing the music_filename and the text description."
)

def inference(self, inputs):
music_filename, text = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
text = description_to_attributes(text) # convert text to attributes
print(f"Generating music from text with melody condition, Input Text: {text}, Melody: {music_filename}.")
updated_music_filename = get_new_audio_name(music_filename, func_name="remix")
melody, sr = torchaudio.load(music_filename)
Expand Down Expand Up @@ -90,7 +91,7 @@ def __init__(self, device):

def inference(self, inputs):
music_filename, instrument, mode = inputs.split(",")[0].strip(), inputs.split(",")[1].strip(), inputs.split(",")[2].strip()
print(f"{mode}ing {instrument} track from {music_filename}.")
print(f"{mode} {instrument} track from {music_filename}.")

if mode == "extract":
instrument_mode = instrument
Expand Down
35 changes: 35 additions & 0 deletions melodytalk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import torch
import numpy as np
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

def cut_dialogue_history(history_memory, keep_last_n_words=500):
if history_memory is None or len(history_memory) == 0:
Expand Down Expand Up @@ -49,3 +52,35 @@ def get_new_audio_name(org_audio_name, func_name="update"):
recent_prev_file_name = name_split[0]
new_file_name = f'{this_new_uuid}_{func_name}_{recent_prev_file_name}_{most_org_file_name}.wav'
return os.path.join(head, new_file_name)

def description_to_attributes(description: str) -> str:
""" This function is a trick to concate key, bpm, (genre, mood, instrument) information to the description.
:param description:
:return:
"""

openai_prompt = f"""Please catch the bpm and key attributes from the original description text. If the description text does not mention it, do not add it. Here are two examples:
Q: Generate a love pop song in C major of 120 bpm.
A: Generate a love pop song. bpm: 120. key: Cmaj.
Q: Generate a love pop song in a minor.
A: Generate a love pop song. key: Amin.
Q: {description}.
A:
"""

response = openai.Completion.create(
model="text-davinci-003",
prompt=openai_prompt,
temperature=0,
max_tokens=100,
top_p=1,
frequency_penalty=0.0,
presence_penalty=0.0,
stop=["\n"]
)

return response.choices[0].text

0 comments on commit 9eb6328

Please sign in to comment.