From 92b877a84c1749d224ef0b74ebf1963e0d2a884f Mon Sep 17 00:00:00 2001
From: Yixiao Zhang <ldzhangyx@outlook.com>
Date: Thu, 22 Jun 2023 18:35:12 +0900
Subject: [PATCH] change audio display: support embedded audio

---
 melodytalk/main.py | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/melodytalk/main.py b/melodytalk/main.py
index bd2abd7..cdb4486 100644
--- a/melodytalk/main.py
+++ b/melodytalk/main.py
@@ -258,14 +258,12 @@ def run_text(self, text, state):
         res = self.agent({"input": text.strip()})
         res['output'] = res['output'].replace("\\", "/")
         state = state + [(text, res['output'])]
-        print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n"
-              f"Current Memory: {self.agent.memory.buffer}")
         if len(res['intermediate_steps']) > 0:
             audio_filename = res['intermediate_steps'][0][1]
-            # audio_filename = re.sub('(music/[-\w]*.wav)', lambda m: f'![](file={m.group(0)})*{m.group(0)}*', res['output'])
-            return state, state, gr.Audio.update(value=audio_filename, visible=True)
-        else:
-            return state, state, gr.Audio.update(visible=False)
+            state = state + [(None,(audio_filename,))]
+        print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n"
+              f"Current Memory: {self.agent.memory.buffer}")
+        return state, state
 
     def run_audio(self, file, state, txt, lang):
         music_filename = os.path.join('music', str(uuid.uuid4())[0:8] + ".wav")
@@ -275,25 +273,18 @@ def run_audio(self, file, state, txt, lang):
         # description = self.models['ImageCaptioning'].inference(image_filename)
         if lang == 'Chinese':
             Human_prompt = f'提供一个名为 {music_filename}的音乐。' \
-                           f'这些信息帮助你理解这个音乐，但是你应该使用工具来完成下面的任务，而不是直接从我的描述中想象。 如果你明白了, 说 \"收到\". \n'
+                           f'这些信息帮助你理解这个音乐，但是你应该使用工具来完成下面的任务，而不是直接从我的描述中想象。 如果你明白了, 说 \"收到\".'
             AI_prompt = "收到。  "
         else:
             Human_prompt = f'Provide a music named {music_filename}. ' \
-                           f'This information helps you to understand this music, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
+                           f'This information helps you to understand this music, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\".'
             AI_prompt = "Received.  "
         self.agent.memory.chat_memory.add_user_message(Human_prompt)
         self.agent.memory.chat_memory.add_ai_message(AI_prompt)
-        state = state + [(f"![](file={music_filename})*{music_filename}*", AI_prompt)]
+        state = state + [((music_filename,), AI_prompt)]
         print(f"\nProcessed run_audio, Input music: {music_filename}\nCurrent state: {state}\n"
               f"Current Memory: {self.agent.memory.buffer}")
-        return state, state, f'{txt} {music_filename} '
-
-
-    def clear_audio(self):
-        return gr.Audio.update(value=None, visible=False)
-
-    def clear_input_audio(self):
-        return gr.Audio.update(value=None)
+        return state, state
 
 if __name__ == '__main__':
     if not os.path.exists("checkpoints"):
@@ -318,15 +309,11 @@ def clear_input_audio(self):
             # with gr.Column(scale=0.15, min_width=0):
             #     rec = gr.A("Record",source="microphone",file_types=["audio"])
 
-        with gr.Row():
-            outaudio = gr.Audio(visible=False)
-
         lang.change(bot.init_agent, [lang], [input_raws, lang, txt, clear])
-        txt.submit(bot.run_text, [txt, state], [chatbot, state, outaudio])
+        txt.submit(bot.run_text, [txt, state], [chatbot, state])
         txt.submit(lambda: "", None, txt)
-        btn.upload(bot.run_audio, [btn, state, txt, lang], [chatbot, state, txt])
+        btn.upload(bot.run_audio, [btn, state, txt, lang], [chatbot, state])
         clear.click(bot.memory.clear)
         clear.click(lambda: [], None, chatbot)
         clear.click(lambda: [], None, state)
-        clear.click(bot.clear_audio, None, outaudio)
     demo.launch(server_name="0.0.0.0", server_port=7860)
\ No newline at end of file