update bpm and key condition

ldzhangyx · Jun 28, 2023 · 9eb6328 · 9eb6328
1 parent 7052668
commit 9eb6328
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -153,3 +153,5 @@ cython_debug/
 
 output/
 assets/
+
+.DS_Store
diff --git a/melodytalk/main.py b/melodytalk/main.py
@@ -226,7 +226,10 @@ def clear_input_audio(self):
     if not os.path.exists("checkpoints"):
         os.mkdir("checkpoints")
     parser = argparse.ArgumentParser()
-    parser.add_argument('--load', type=str, default="Text2Music_cuda:0, ExtractTrack_cuda:0, Text2MusicWithMelody_cuda:0")
+    parser.add_argument('--load', type=str, default="Text2Music_cuda:0, "
+                                                    "ExtractTrack_cuda:0, "
+                                                    "Text2MusicWithMelody_cuda:0,"
+                                                    "SimpleTracksMixing_cuda:0")
     args = parser.parse_args()
     load_dict = {e.split('_')[0].strip(): e.split('_')[1].strip() for e in args.load.split(',')}
     bot = ConversationBot(load_dict=load_dict)

diff --git a/melodytalk/modules.py b/melodytalk/modules.py
@@ -10,7 +10,7 @@
 # source separation
 import demucs.separate
 
-from utils import prompts, get_new_audio_name
+from utils import prompts, get_new_audio_name, description_to_attributes, cut_dialogue_history
 
 
 # Initialze common models
@@ -32,7 +32,7 @@ def __init__(self, device):
 
     def inference(self, text):
         music_filename = os.path.join("music", f"{str(uuid.uuid4())[:8]}.wav")
-        prompt = text
+        text = description_to_attributes(text)  # convert text to attributes
         wav = self.model.generate([text], progress=False)
         wav = wav[0]  # batch size is 1
         audio_write(music_filename[:-4],
@@ -47,15 +47,16 @@ def __init__(self, device):
         self.model = musicgen_model
 
     @prompts(
-        name="Generate music from user input text with melody condition",
-        description="useful if you want to generate, style transfer or remix music from a user input text with a given melody condition."
+        name="Generate music from user input text with melody or track condition",
+        description="useful if you want to generate, style transfer or remix music from a user input text with a given melody or track condition."
                     "like: remix the given melody with text description, or doing style transfer as text described with the given melody."
                     "The input to this tool should be a comma separated string of two, "
                     "representing the music_filename and the text description."
     )
 
     def inference(self, inputs):
         music_filename, text = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
+        text = description_to_attributes(text)  # convert text to attributes
         print(f"Generating music from text with melody condition, Input Text: {text}, Melody: {music_filename}.")
         updated_music_filename = get_new_audio_name(music_filename, func_name="remix")
         melody, sr = torchaudio.load(music_filename)
@@ -90,7 +91,7 @@ def __init__(self, device):
 
     def inference(self, inputs):
         music_filename, instrument, mode = inputs.split(",")[0].strip(), inputs.split(",")[1].strip(), inputs.split(",")[2].strip()
-        print(f"{mode}ing {instrument} track from {music_filename}.")
+        print(f"{mode} {instrument} track from {music_filename}.")
 
         if mode == "extract":
             instrument_mode = instrument

diff --git a/melodytalk/utils.py b/melodytalk/utils.py
@@ -3,6 +3,9 @@
 import torch
 import numpy as np
 import os
+import openai
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
 
 def cut_dialogue_history(history_memory, keep_last_n_words=500):
     if history_memory is None or len(history_memory) == 0:
@@ -49,3 +52,35 @@ def get_new_audio_name(org_audio_name, func_name="update"):
     recent_prev_file_name = name_split[0]
     new_file_name = f'{this_new_uuid}_{func_name}_{recent_prev_file_name}_{most_org_file_name}.wav'
     return os.path.join(head, new_file_name)
+
+def description_to_attributes(description: str) -> str:
+    """ This function is a trick to concate key, bpm, (genre, mood, instrument) information to the description.
+
+    :param description:
+    :return:
+    """
+
+    openai_prompt = f"""Please catch the bpm and key attributes from the original description text. If the description text does not mention it, do not add it. Here are two examples:
+
+    Q: Generate a love pop song in C major of 120 bpm.
+    A: Generate a love pop song. bpm: 120. key: Cmaj. 
+    
+    Q: Generate a love pop song in a minor.
+    A:  Generate a love pop song. key: Amin. 
+    
+    Q: {description}.
+    A: 
+    """
+
+    response = openai.Completion.create(
+      model="text-davinci-003",
+      prompt=openai_prompt,
+      temperature=0,
+      max_tokens=100,
+      top_p=1,
+      frequency_penalty=0.0,
+      presence_penalty=0.0,
+      stop=["\n"]
+    )
+
+    return response.choices[0].text
Original file line number	Diff line number	Diff line change
Expand Up		@@ -153,3 +153,5 @@ cython_debug/

		output/
		assets/

		.DS_Store