streaming is now in and works. Need to update the surrounding chat fu…

…nctionality though....
rmusser01 · Jan 1, 2025 · c9286ac · c9286ac
1 parent 674c7e8
commit c9286ac
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 39 deletions.
diff --git a/App_Function_Libraries/Chat/Chat_Functions.py b/App_Function_Libraries/Chat/Chat_Functions.py
@@ -55,12 +55,15 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag
         elif api_endpoint.lower() == 'anthropic':
             # Retrieve the model from config
             loaded_config_data = load_and_log_configs()
-            model = loaded_config_data['models']['anthropic'] if loaded_config_data else None
+            if not model:
+                model = loaded_config_data['anthropic_api']['model']
             response = chat_with_anthropic(
                 api_key=api_key,
                 input_data=input_data,
                 model=model,
                 custom_prompt_arg=prompt,
+                max_retries=3,
+                retry_delay=5,
                 system_prompt=system_message
             )
 

diff --git a/App_Function_Libraries/Gradio_UI/Chat_ui.py b/App_Function_Libraries/Gradio_UI/Chat_ui.py
@@ -85,7 +85,7 @@ def clear_chat_single():
 
 # FIXME - add additional features....
 def chat_wrapper(message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, conversation_id,
-                 save_conversation, temperature, system_prompt, max_tokens=None, top_p=None, frequency_penalty=None,
+                 save_conversation, temperature, system_prompt, streaming=False, max_tokens=None, top_p=None, frequency_penalty=None,
                  presence_penalty=None, stop_sequence=None):
     try:
         if save_conversation:
@@ -107,22 +107,21 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
             full_message = message
 
         # Generate bot response
-        bot_message = chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt,
-                           temperature, system_prompt)
-
-        logging.debug(f"Bot message being returned: {bot_message}")
+        bot_message = ""
+        for chunk in chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt,
+                          temperature, system_prompt, streaming):
+            bot_message += chunk  # Accumulate the streamed response
+            logging.debug(f"Bot message being returned: {bot_message}")
+            # Yield the incremental response and updated history
+            yield bot_message, history + [(message, bot_message)], conversation_id
 
         if save_conversation:
             # Add assistant message to the database
             save_message(conversation_id, role="assistant", content=bot_message)
 
-        # Update history
-        new_history = history + [(message, bot_message)]
-
-        return bot_message, new_history, conversation_id
     except Exception as e:
         logging.error(f"Error in chat wrapper: {str(e)}")
-        return "An error occurred.", history, conversation_id
+        yield "An error occurred.", history, conversation_id
 
 
 def search_conversations(query):
@@ -174,7 +173,7 @@ def load_conversation(conversation_id):
 
 
 def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature,
-                            system_prompt):
+                            system_prompt, streaming=False):
     if not history:
         return history, "No messages to regenerate."
 
@@ -189,20 +188,19 @@ def regenerate_last_message(history, media_content, selected_parts, api_endpoint
     if not last_user_message:
         return new_history, "No user message to regenerate the bot response."
 
-    full_message = last_user_message
-
-    bot_message = chat(
-        full_message,
-        new_history,
-        media_content,
-        selected_parts,
-        api_endpoint,
-        api_key,
-        custom_prompt,
-        temperature,
-        system_prompt
-    )
+    # Generate the new bot response
+    bot_message = ""
+    if streaming:
+        # For streaming, consume the generator and accumulate the response
+        for chunk in chat(last_user_message, new_history, media_content, selected_parts, api_endpoint, api_key,
+                          custom_prompt, temperature, system_prompt, streaming):
+            bot_message += chunk
+    else:
+        # For non-streaming, get the complete response directly
+        bot_message = next(chat(last_user_message, new_history, media_content, selected_parts, api_endpoint, api_key,
+                                custom_prompt, temperature, system_prompt, streaming))
 
+    # Update the history with the regenerated message
     new_history.append((last_user_message, bot_message))
 
     return new_history, "Last message regenerated successfully."
@@ -522,7 +520,7 @@ def on_next_page_click(current_page, total_pages):
         regenerate_button.click(
             regenerate_last_message,
             inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature,
-                    system_prompt_input],
+                    system_prompt_input, streaming],
             outputs=[chatbot, save_status]
         ).then(
             lambda history: approximate_token_count(history),

diff --git a/App_Function_Libraries/LLM_API_Calls.py b/App_Function_Libraries/LLM_API_Calls.py
@@ -321,7 +321,7 @@ def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retri
         logging.debug(f"AnthropicAI: Type of data: {type(input_data)}")
 
         # Retrieve the model from config if not provided
-        if not model:
+        if model is None:
             try:
                 anthropic_model = loaded_config_data['anthropic_api']['model']
                 logging.debug(f"Anthropic: Loaded model from config: {anthropic_model}")
@@ -333,9 +333,10 @@ def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retri
             logging.debug(f"Anthropic: Using provided model: {anthropic_model}")
 
         if temp is None:
-            temperature = loaded_config_data['anthropic_api']['temperature']
-            temp = 1.0
-
+            temp = loaded_config_data['anthropic_api']['temperature']
+            temp = float(temp)
+        else:
+            temp = 0.7
             logging.debug(f"Anthropic: Using default temperature: {temp}")
 
         headers = {
@@ -467,7 +468,7 @@ def chat_with_cohere(api_key, input_data, model=None, custom_prompt_arg=None, sy
         else:
             logging.info("Cohere Chat: API key not provided as parameter")
             logging.info("Cohere Chat: Attempting to use API key from config file")
-            logging.debug(f"Cohere Chat: Cohere API Key from config: {loaded_config_data['api_keys']['cohere']}")
+            logging.debug(f"Cohere Chat: Cohere API Key from config: {loaded_config_data['cohere_api']['api_key']}")
             cohere_api_key = loaded_config_data['cohere_api']['api_key']
             if cohere_api_key:
                 logging.debug(f"Cohere Chat: Cohere API Key from config: {cohere_api_key[:3]}...{cohere_api_key[-3:]}")

diff --git a/App_Function_Libraries/Utils/Utils.py b/App_Function_Libraries/Utils/Utils.py
@@ -241,10 +241,10 @@ def load_and_log_configs():
         anthropic_temperature = config.get('API', 'anthropic_temperature', fallback='0.7')
         anthropic_top_p = config.get('API', 'anthropic_top_p', fallback='0.95')
         anthropic_min_p = config.get('API', 'anthropic_min_p', fallback='0.05')
-        cohore_streaming = config.get('API', 'cohere_streaming', fallback='False')
-        cohore_temperature = config.get('API', 'cohere_temperature', fallback='0.7')
-        cohore_top_p = config.get('API', 'cohere_top_p', fallback='0.95')
-        cohore_min_p = config.get('API', 'cohere_min_p', fallback='0.05')
+        cohere_streaming = config.get('API', 'cohere_streaming', fallback='False')
+        cohere_temperature = config.get('API', 'cohere_temperature', fallback='0.7')
+        cohere_top_p = config.get('API', 'cohere_top_p', fallback='0.95')
+        cohere_min_p = config.get('API', 'cohere_min_p', fallback='0.05')
         groq_streaming = config.get('API', 'groq_streaming', fallback='False')
         groq_temperature = config.get('API', 'groq_temperature', fallback='0.7')
         groq_top_p = config.get('API', 'groq_top_p', fallback='0.95')
@@ -465,10 +465,10 @@ def load_and_log_configs():
             'cohere_api': {
                 'api_key': cohere_api_key,
                 'model': cohere_model,
-                'streaming': cohore_streaming,
-                'temperature': cohore_temperature,
-                'top_p': cohore_top_p,
-                'min_p': cohore_min_p
+                'streaming': cohere_streaming,
+                'temperature': cohere_temperature,
+                'top_p': cohere_top_p,
+                'min_p': cohere_min_p
             },
             'groq_api': {
                 'api_key': groq_api_key,