From b6bac895bbf2246fa8abd1f60a42dc5c349fec37 Mon Sep 17 00:00:00 2001
From: Yixiao Zhang <ldzhangyx@outlook.com>
Date: Fri, 14 Jul 2023 17:16:50 +0900
Subject: [PATCH] update system args

---
 melodytalk/main.py    | 18 +++++++++---------
 melodytalk/modules.py | 42 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/melodytalk/main.py b/melodytalk/main.py
index 41de164..03fbc8d 100644
--- a/melodytalk/main.py
+++ b/melodytalk/main.py
@@ -127,8 +127,8 @@
 
 class ConversationBot(object):
     def __init__(self):
-        load_dict = {"Text2Music":"cuda:0", "ExtractTrack":"cuda:0", "Text2MusicWithMelody":"cuda:0", "SimpleTracksMixing":"cuda:0"}
-        template_dict = { "Text2MusicwithChord": "cuda:0"} # "Accompaniment": "cuda:0",
+        load_dict = {"Text2Music":"cuda:0", "ExtractTrack":"cuda:0", "Text2MusicWithMelody":"cuda:0", "Text2MusicWithDrum":"cuda:0"}
+        template_dict = None #{ "Text2MusicwithChord": "cuda:0"} # "Accompaniment": "cuda:0",
 
         print(f"Initializing MelodyTalk, load_dict={load_dict}, template_dict={template_dict}")
 
@@ -138,13 +138,13 @@ def __init__(self):
             self.models[class_name] = globals()[class_name](device=device)
 
         # Load Template Foundation Models
-        for class_name, device in template_dict.items():
-            template_required_names = {k for k in inspect.signature(globals()[class_name].__init__).parameters.keys() if
-                                       k != 'self'}
-            loaded_names = set([type(e).__name__ for e in self.models.values()])
-            if template_required_names.issubset(loaded_names):
-                self.models[class_name] = globals()[class_name](
-                    **{name: self.models[name] for name in template_required_names})
+        # for class_name, device in template_dict.items():
+        #     template_required_names = {k for k in inspect.signature(globals()[class_name].__init__).parameters.keys() if
+        #                                k != 'self'}
+        #     loaded_names = set([type(e).__name__ for e in self.models.values()])
+        #     if template_required_names.issubset(loaded_names):
+        #         self.models[class_name] = globals()[class_name](
+        #             **{name: self.models[name] for name in template_required_names})
 
         print(f"All the Available Functions: {self.models}")
 
diff --git a/melodytalk/modules.py b/melodytalk/modules.py
index 97e07dd..9a1786e 100644
--- a/melodytalk/modules.py
+++ b/melodytalk/modules.py
@@ -16,7 +16,7 @@
 
 from utils import *
 
-DURATION = 15
+DURATION = 6
 GENERATION_CANDIDATE = 5
 
 # Initialze common models
@@ -107,9 +107,10 @@ def __init__(self, device):
         self.model = musicgen_model_melody
 
     @prompts(
-        name="Generate music from user input text with given drum pattern",
-        description="useful if you want to generate music from a user input text with a given drum pattern."
-                    "like: generate of pop song, and following the drum pattern above."
+        name="Generate music from user input text based on the drum audio file provided.",
+        description="useful if you want to generate music from a user input text and a previous given drum pattern."
+                    "Do not use it when no previous music file (generated of uploaded) in the history."
+                    "like: generate a pop song based on the provided drum pattern above."
                     "The input to this tool should be a comma separated string of two, "
                     "representing the music_filename and the text description."
     )
@@ -136,7 +137,40 @@ def inference(self, inputs):
         return updated_music_filename
 
 
+class AddNewTrack(object):
+    def __init__(self, device):
+        print("Initializing AddNewTrack")
+        self.device = device
+        self.model = musicgen_model_melody
+
+    @prompts(
+        name="Add a new track to the given music loop",
+        description="useful if you want to add a new track (usually add a new instrument) to the given music."
+                    "like: add a saxophone to the given music, or add piano arrangement to the given music."
+                    "The input to this tool should be a comma separated string of two, "
+                    "representing the music_filename and the text description."
+    )
+
+    def inference(self, inputs):
+        music_filename, text = inputs.split(",")[0].strip(), inputs.split(",")[1].strip()
+        text = description_to_attributes(text)
+        print(f"Generating music from text with drum condition, Input Text: {text}, Drum: {music_filename}.")
+        updated_music_filename = get_new_audio_name(music_filename, func_name="with_drum")
+        drum, sr = torchaudio.load(music_filename)
+        self.model.set_generation_params(duration=30)
+        wav = self.model.generate_continuation(prompt=drum[None].expand(GENERATION_CANDIDATE, -1, -1), prompt_sr=sr,
+                                               descriptions=[text] * GENERATION_CANDIDATE, progress=False)
+        self.model.set_generation_params(duration=DURATION)
+        # cut drum prompt
+        wav = wav[:, drum.shape[1]:drum.shape[1] + DURATION * sr]
+        # TODO: split tracks by beats
 
+        # select the best one by CLAP scores
+        best_wav, _ = CLAP_post_filter(CLAP_model, text, wav, self.model.sample_rate)
+        audio_write(updated_music_filename[:-4],
+                    best_wav.cpu(), self.model.sample_rate, strategy="loudness", loudness_compressor=True)
+        print(f"\nProcessed Text2MusicWithDrum, Output Music: {updated_music_filename}.")
+        return updated_music_filename
 
 
 class ExtractTrack(object):