BREAKING MERGE (config.txt change) - ALL PRIOR VERSIONS BROKEN BY THI…

…S - Merge pull request #409 from rmusser01/dev ALL PRIOR VERSIONS ARE BROKEN BY THIS MERGE. THIS VERSION INTRODUCES A NEW config.txt FILE WHICH WILL CAUSE BREAKING WHEN USING ANY LLM API CALLS. Docs, TTS, answer streaming, web search and perplexity clone Updates: More Docs (no user guides yet 👎 , but we got some feature documentation! 👍 ) Foundation for TTS, setup a basic pipeline and have the ability/plan to add more TTS APIs/Engines Have support for streaming of answers in chat in all except the 4-way chat and the One prompt-mutiple, API chat. Added foundations + actual feature of Web Search, can now do web search + sub searches to answer a query/do research on a topic. UI is a (even more of a temporary) placeholder for now, not too happy with it. But the underlying pipeline is pretty nice, can do a single -> sub/split query through your choice of search provider (Bing, DDG, Google, Brave, SearX, Kagi) and then have the results evaluated for relevancy, collected via page scraping and then summarized for final analysis. Lots of areas for improvement.
rmusser01 · Jan 4, 2025 · b579bad · b579bad
2 parents 8712523 + 5e2c2d5
commit b579bad
Show file tree

Hide file tree

Showing 91 changed files with 9,391 additions and 2,022 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -33,8 +33,10 @@ jobs:
     - name: Test ChromaDB lib functions with pytest
       run: |
         pwd 
-        cd ./Tests/ChromaDB
-        pytest test_chromadb.py
+        ls
+        ls ./Config_Files
+        cd ./Tests/ChromaDB/
+        pytest ./test_chromadb.py
 
     - name: Test RAG lib functions with pytest
       run: |
@@ -59,3 +61,8 @@ jobs:
         pwd
         cd ./Tests/Utils
         pytest test_utils.py
+
+    - name: Test tldw runs
+      run: |
+        pwd
+        python summarize.py -h
diff --git a/App_Function_Libraries/Audio/models/pyannote_diarization_config.yaml b/App_Function_Libraries/Audio/models/pyannote_diarization_config.yaml
@@ -0,0 +1,13 @@
+pipeline:
+  params:
+    clustering: AgglomerativeClustering
+    embedding: /FULL/PATH/TO/SCRIPT/tldw/App_Function_Libraries/models/pyannote_model_wespeaker-voxceleb-resnet34-LM.bin #models/pyannote_model_wespeaker-voxceleb-resnet34-LM.bin
+    segmentation: /FULL/PATH/TO/SCRIPT/tldw/App_Function_Libraries/models/pyannote_model_segmentation-3.0.bin #models/pyannote_model_segmentation-3.0.bin
+
+params:
+  segmentation:
+    min_duration_off: 0.0
+  clustering:
+    method: centroid
+    min_cluster_size: 12
+    threshold: 0.7045654963945799
diff --git a/App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py b/App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py
@@ -59,15 +59,20 @@ def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str,
 
 Overall Assessment: [Your overall assessment of the summary's quality]
 """
-
+    # FIXME - Add g_eval_model to config.txt
+    # g_eval_model = loaded_config[][]
     try:
         result = chat_api_call(
             api_name,
             api_key,
             prompt,
             "",
             temp=temp,
-            system_message="You are a helpful AI assistant tasked with evaluating summaries."
+            system_message="You are a helpful AI assistant tasked with evaluating summaries.",
+            streaming=False,
+            minp=None,
+            maxp=None,
+            model=None
         )
     except Exception as e:
         return detailed_api_error(api_name, e)

diff --git a/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py b/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py
@@ -329,7 +329,7 @@ def geval_summarization(
                 temp = 0.7
                 logging.info(f"Debug - geval_summarization Function - API Endpoint: {api_endpoint}")
                 try:
-                    response = chat_api_call(api_endpoint, api_key, prompt_with_src_and_gen, "", temp, system_message)
+                    response = chat_api_call(api_endpoint, api_key, prompt_with_src_and_gen, "", temp, system_message, streaming=False, minp=None, maxp=None, model=None)
                 except Exception as e:
                     raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
     except RetryError:

diff --git a/App_Function_Libraries/Character_Chat/Character_Chat_Lib.py b/App_Function_Libraries/Character_Chat/Character_Chat_Lib.py
@@ -87,13 +87,25 @@ def extract_character_id(choice: str) -> int:
     """Extract the character ID from the dropdown selection string."""
     log_counter("extract_character_id_attempt")
     try:
-        character_id = int(choice.split('(ID: ')[1].rstrip(')'))
+        logging.debug(f"Choice received: {choice}")  # Debugging line
+        if not choice:
+            raise ValueError("No choice provided.")
+
+        if '(ID: ' not in choice or ')' not in choice:
+            raise ValueError(f"Invalid choice format: {choice}")
+
+        # Extract the ID part
+        id_part = choice.split('(ID: ')[1]
+        character_id = int(id_part.rstrip(')'))
+
+        logging.debug(f"Extracted character ID: {character_id}")  # Debugging line
         log_counter("extract_character_id_success")
         return character_id
     except Exception as e:
         log_counter("extract_character_id_error", labels={"error": str(e)})
         raise
 
+
 def load_character_wrapper(character_id: int, user_name: str) -> Tuple[Dict[str, Any], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
     """Wrapper function to load character and image using the extracted ID."""
     log_counter("load_character_wrapper_attempt")

diff --git a/App_Function_Libraries/Chat/Chat_Functions.py b/App_Function_Libraries/Chat/Chat_Functions.py
@@ -30,16 +30,23 @@
 # Functions:
 
 def approximate_token_count(history):
-    total_text = ''
-    for user_msg, bot_msg in history:
-        if user_msg:
-            total_text += user_msg + ' '
-        if bot_msg:
-            total_text += bot_msg + ' '
-    total_tokens = len(total_text.split())
-    return total_tokens
-
-def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message=None):
+    try:
+        total_text = ''
+        for user_msg, bot_msg in history:
+            if user_msg:
+                total_text += user_msg + ' '
+            if bot_msg:
+                total_text += bot_msg + ' '
+        total_tokens = len(total_text.split())
+        return total_tokens
+    except Exception as e:
+        logging.error(f"Error calculating token count: {str(e)}")
+        return 0
+
+
+# FIXME - add model parameter
+def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message, streaming, minp=None, maxp=None, model=None):
+    logging.info(f"Debug - Chat API Call - API Endpoint: {api_endpoint}")
     log_counter("chat_api_call_attempt", labels={"api_endpoint": api_endpoint})
     start_time = time.time()
     if not api_key:
@@ -50,18 +57,22 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag
         logging.info(f"Debug - Chat API Call - API Key: {api_key}")
         logging.info(f"Debug - Chat chat_api_call - API Endpoint: {api_endpoint}")
         if api_endpoint.lower() == 'openai':
-            response = chat_with_openai(api_key, input_data, prompt, temp, system_message)
+            response = chat_with_openai(api_key, input_data, prompt, temp, system_message, streaming, minp, maxp, model)
 
         elif api_endpoint.lower() == 'anthropic':
             # Retrieve the model from config
             loaded_config_data = load_and_log_configs()
-            model = loaded_config_data['models']['anthropic'] if loaded_config_data else None
+            if not model:
+                model = loaded_config_data['anthropic_api']['model']
             response = chat_with_anthropic(
                 api_key=api_key,
                 input_data=input_data,
                 model=model,
                 custom_prompt_arg=prompt,
-                system_prompt=system_message
+                max_retries=3,
+                retry_delay=5,
+                system_prompt=system_message,
+                streaming=streaming,
             )
 
         elif api_endpoint.lower() == "cohere":
@@ -133,7 +144,7 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag
 
 
 def chat(message, history, media_content, selected_parts, api_endpoint, api_key, prompt, temperature,
-         system_message=None):
+         system_message=None, streaming=False, minp=None, maxp=None, model=None):
     log_counter("chat_attempt", labels={"api_endpoint": api_endpoint})
     start_time = time.time()
     try:
@@ -172,12 +183,15 @@ def chat(message, history, media_content, selected_parts, api_endpoint, api_key,
         logging.debug(f"Debug - Chat Function - Prompt: {prompt}")
 
         # Use the existing API request code based on the selected endpoint
-        response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message)
+        response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message, streaming, minp=None, maxp=None, model=None)
 
-        chat_duration = time.time() - start_time
-        log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint})
-        log_counter("chat_success", labels={"api_endpoint": api_endpoint})
-        return response
+        if streaming:
+            return response
+        else:
+            chat_duration = time.time() - start_time
+            log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint})
+            log_counter("chat_success", labels={"api_endpoint": api_endpoint})
+            return response
     except Exception as e:
         log_counter("chat_error", labels={"api_endpoint": api_endpoint, "error": str(e)})
         logging.error(f"Error in chat function: {str(e)}")
@@ -374,7 +388,6 @@ def update_chat_content(selected_item, use_content, use_summary, use_prompt, ite
 
 CHARACTERS_FILE = Path('.', 'Helper_Scripts', 'Character_Cards', 'Characters.json')
 
-
 def save_character(character_data):
     log_counter("save_character_attempt")
     start_time = time.time()
@@ -435,7 +448,6 @@ def load_characters():
         return {}
 
 
-
 def get_character_names():
     log_counter("get_character_names_attempt")
     start_time = time.time()

diff --git a/App_Function_Libraries/Chat/Chat_Pipeline.py b/App_Function_Libraries/Chat/Chat_Pipeline.py
@@ -0,0 +1,19 @@
+# Chat_Pipeline.py
+#
+# Description: This file contains functions related to the prompt modification pipeline, available as a means of
+#   complex prompt modification/replacement without modification of original intent/messaging.
+#
+# Imports
+import os
+#
+# 3rd-party Libraries
+#
+# Local Imports
+#
+#######################################################################################################################
+#
+# Functions:
+
+#
+# End of Chat_Pipeline.py
+#######################################################################################################################
diff --git a/App_Function_Libraries/DB/Character_Chat_DB.py b/App_Function_Libraries/DB/Character_Chat_DB.py
@@ -355,17 +355,20 @@ def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
 
 def get_character_cards() -> List[Dict]:
     """Retrieve all character cards from the database."""
-    logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
-    conn = sqlite3.connect(chat_DB_PATH)
-    cursor = conn.cursor()
-    cursor.execute("SELECT * FROM CharacterCards")
-    rows = cursor.fetchall()
-    columns = [description[0] for description in cursor.description]
-    conn.close()
-    characters = [dict(zip(columns, row)) for row in rows]
-    #logging.debug(f"Characters fetched from DB: {characters}")
-    return characters
-
+    try:
+        logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
+        conn = sqlite3.connect(chat_DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute("SELECT * FROM CharacterCards")
+        rows = cursor.fetchall()
+        columns = [description[0] for description in cursor.description]
+        conn.close()
+        characters = [dict(zip(columns, row)) for row in rows]
+        logging.debug(f"Characters fetched from DB: {characters}")
+        return characters
+    except Exception as e:
+        logging.error(f"Error fetching character cards: {e}")
+        return []
 
 def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
     """

diff --git a/App_Function_Libraries/Gradio_Related.py b/App_Function_Libraries/Gradio_Related.py
@@ -65,6 +65,7 @@
     create_utilities_yt_video_tab
 from App_Function_Libraries.Gradio_UI.Video_transcription_tab import create_video_transcription_tab
 from App_Function_Libraries.Gradio_UI.View_tab import create_manage_items_tab
+from App_Function_Libraries.Gradio_UI.WebSearch_tab import create_websearch_tab
 from App_Function_Libraries.Gradio_UI.Website_scraping_tab import create_website_scraping_tab
 from App_Function_Libraries.Gradio_UI.Workflows_tab import chat_workflows_tab
 from App_Function_Libraries.Gradio_UI.View_DB_Items_tab import create_view_all_mediadb_with_versions_tab, \
@@ -403,6 +404,10 @@ def launch_ui(share_public=None, server_mode=False):
         padding: 10px;
         margin-top: 10px;
     }
+    .scrollable-textbox textarea {
+    height: 600px !important;
+    overflow-y: auto !important;
+    }
     """
 
     config = load_and_log_configs()
@@ -464,6 +469,8 @@ def launch_ui(share_public=None, server_mode=False):
                 create_chat_interface_four()
                 chat_workflows_tab()
 
+            with gr.TabItem("Web Search & Review", id="websearch group", visible=True):
+                create_websearch_tab()
             with gr.TabItem("Character Chat", id="character chat group", visible=True):
                 create_character_card_interaction_tab()
                 create_character_chat_mgmt_tab()