diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2ec85b147..e373d2698 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -33,8 +33,10 @@ jobs: - name: Test ChromaDB lib functions with pytest run: | pwd - cd ./Tests/ChromaDB - pytest test_chromadb.py + ls + ls ./Config_Files + cd ./Tests/ChromaDB/ + pytest ./test_chromadb.py - name: Test RAG lib functions with pytest run: | @@ -59,3 +61,8 @@ jobs: pwd cd ./Tests/Utils pytest test_utils.py + + - name: Test tldw runs + run: | + pwd + python summarize.py -h diff --git a/App_Function_Libraries/Audio/models/pyannote_diarization_config.yaml b/App_Function_Libraries/Audio/models/pyannote_diarization_config.yaml new file mode 100644 index 000000000..675f70f26 --- /dev/null +++ b/App_Function_Libraries/Audio/models/pyannote_diarization_config.yaml @@ -0,0 +1,13 @@ +pipeline: + params: + clustering: AgglomerativeClustering + embedding: /FULL/PATH/TO/SCRIPT/tldw/App_Function_Libraries/models/pyannote_model_wespeaker-voxceleb-resnet34-LM.bin #models/pyannote_model_wespeaker-voxceleb-resnet34-LM.bin + segmentation: /FULL/PATH/TO/SCRIPT/tldw/App_Function_Libraries/models/pyannote_model_segmentation-3.0.bin #models/pyannote_model_segmentation-3.0.bin + +params: + segmentation: + min_duration_off: 0.0 + clustering: + method: centroid + min_cluster_size: 12 + threshold: 0.7045654963945799 \ No newline at end of file diff --git a/App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py b/App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py index e2fc127b6..73ed98fd4 100644 --- a/App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py +++ b/App_Function_Libraries/Benchmarks_Evaluations/Confabulation_check.py @@ -59,7 +59,8 @@ def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str, Overall Assessment: [Your overall assessment of the summary's quality] """ - + # FIXME - Add g_eval_model to config.txt + # g_eval_model = loaded_config[][] try: result = chat_api_call( api_name, @@ -67,7 +68,11 @@ def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str, prompt, "", temp=temp, - system_message="You are a helpful AI assistant tasked with evaluating summaries." + system_message="You are a helpful AI assistant tasked with evaluating summaries.", + streaming=False, + minp=None, + maxp=None, + model=None ) except Exception as e: return detailed_api_error(api_name, e) diff --git a/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py b/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py index a6c7651d3..2367f875a 100644 --- a/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py +++ b/App_Function_Libraries/Benchmarks_Evaluations/ms_g_eval.py @@ -329,7 +329,7 @@ def geval_summarization( temp = 0.7 logging.info(f"Debug - geval_summarization Function - API Endpoint: {api_endpoint}") try: - response = chat_api_call(api_endpoint, api_key, prompt_with_src_and_gen, "", temp, system_message) + response = chat_api_call(api_endpoint, api_key, prompt_with_src_and_gen, "", temp, system_message, streaming=False, minp=None, maxp=None, model=None) except Exception as e: raise ValueError(f"Unsupported API endpoint: {api_endpoint}") except RetryError: diff --git a/App_Function_Libraries/Character_Chat/Character_Chat_Lib.py b/App_Function_Libraries/Character_Chat/Character_Chat_Lib.py index 4546b4344..c57c73c61 100644 --- a/App_Function_Libraries/Character_Chat/Character_Chat_Lib.py +++ b/App_Function_Libraries/Character_Chat/Character_Chat_Lib.py @@ -87,13 +87,25 @@ def extract_character_id(choice: str) -> int: """Extract the character ID from the dropdown selection string.""" log_counter("extract_character_id_attempt") try: - character_id = int(choice.split('(ID: ')[1].rstrip(')')) + logging.debug(f"Choice received: {choice}") # Debugging line + if not choice: + raise ValueError("No choice provided.") + + if '(ID: ' not in choice or ')' not in choice: + raise ValueError(f"Invalid choice format: {choice}") + + # Extract the ID part + id_part = choice.split('(ID: ')[1] + character_id = int(id_part.rstrip(')')) + + logging.debug(f"Extracted character ID: {character_id}") # Debugging line log_counter("extract_character_id_success") return character_id except Exception as e: log_counter("extract_character_id_error", labels={"error": str(e)}) raise + def load_character_wrapper(character_id: int, user_name: str) -> Tuple[Dict[str, Any], List[Tuple[Optional[str], str]], Optional[Image.Image]]: """Wrapper function to load character and image using the extracted ID.""" log_counter("load_character_wrapper_attempt") diff --git a/App_Function_Libraries/Chat/Chat_Functions.py b/App_Function_Libraries/Chat/Chat_Functions.py index 1bfd00f98..aeea31071 100644 --- a/App_Function_Libraries/Chat/Chat_Functions.py +++ b/App_Function_Libraries/Chat/Chat_Functions.py @@ -30,16 +30,23 @@ # Functions: def approximate_token_count(history): - total_text = '' - for user_msg, bot_msg in history: - if user_msg: - total_text += user_msg + ' ' - if bot_msg: - total_text += bot_msg + ' ' - total_tokens = len(total_text.split()) - return total_tokens - -def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message=None): + try: + total_text = '' + for user_msg, bot_msg in history: + if user_msg: + total_text += user_msg + ' ' + if bot_msg: + total_text += bot_msg + ' ' + total_tokens = len(total_text.split()) + return total_tokens + except Exception as e: + logging.error(f"Error calculating token count: {str(e)}") + return 0 + + +# FIXME - add model parameter +def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message, streaming, minp=None, maxp=None, model=None): + logging.info(f"Debug - Chat API Call - API Endpoint: {api_endpoint}") log_counter("chat_api_call_attempt", labels={"api_endpoint": api_endpoint}) start_time = time.time() if not api_key: @@ -50,18 +57,22 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag logging.info(f"Debug - Chat API Call - API Key: {api_key}") logging.info(f"Debug - Chat chat_api_call - API Endpoint: {api_endpoint}") if api_endpoint.lower() == 'openai': - response = chat_with_openai(api_key, input_data, prompt, temp, system_message) + response = chat_with_openai(api_key, input_data, prompt, temp, system_message, streaming, minp, maxp, model) elif api_endpoint.lower() == 'anthropic': # Retrieve the model from config loaded_config_data = load_and_log_configs() - model = loaded_config_data['models']['anthropic'] if loaded_config_data else None + if not model: + model = loaded_config_data['anthropic_api']['model'] response = chat_with_anthropic( api_key=api_key, input_data=input_data, model=model, custom_prompt_arg=prompt, - system_prompt=system_message + max_retries=3, + retry_delay=5, + system_prompt=system_message, + streaming=streaming, ) elif api_endpoint.lower() == "cohere": @@ -133,7 +144,7 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag def chat(message, history, media_content, selected_parts, api_endpoint, api_key, prompt, temperature, - system_message=None): + system_message=None, streaming=False, minp=None, maxp=None, model=None): log_counter("chat_attempt", labels={"api_endpoint": api_endpoint}) start_time = time.time() try: @@ -172,12 +183,15 @@ def chat(message, history, media_content, selected_parts, api_endpoint, api_key, logging.debug(f"Debug - Chat Function - Prompt: {prompt}") # Use the existing API request code based on the selected endpoint - response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message) + response = chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_message, streaming, minp=None, maxp=None, model=None) - chat_duration = time.time() - start_time - log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint}) - log_counter("chat_success", labels={"api_endpoint": api_endpoint}) - return response + if streaming: + return response + else: + chat_duration = time.time() - start_time + log_histogram("chat_duration", chat_duration, labels={"api_endpoint": api_endpoint}) + log_counter("chat_success", labels={"api_endpoint": api_endpoint}) + return response except Exception as e: log_counter("chat_error", labels={"api_endpoint": api_endpoint, "error": str(e)}) logging.error(f"Error in chat function: {str(e)}") @@ -374,7 +388,6 @@ def update_chat_content(selected_item, use_content, use_summary, use_prompt, ite CHARACTERS_FILE = Path('.', 'Helper_Scripts', 'Character_Cards', 'Characters.json') - def save_character(character_data): log_counter("save_character_attempt") start_time = time.time() @@ -435,7 +448,6 @@ def load_characters(): return {} - def get_character_names(): log_counter("get_character_names_attempt") start_time = time.time() diff --git a/App_Function_Libraries/Chat/Chat_Pipeline.py b/App_Function_Libraries/Chat/Chat_Pipeline.py new file mode 100644 index 000000000..d27ffa81d --- /dev/null +++ b/App_Function_Libraries/Chat/Chat_Pipeline.py @@ -0,0 +1,19 @@ +# Chat_Pipeline.py +# +# Description: This file contains functions related to the prompt modification pipeline, available as a means of +# complex prompt modification/replacement without modification of original intent/messaging. +# +# Imports +import os +# +# 3rd-party Libraries +# +# Local Imports +# +####################################################################################################################### +# +# Functions: + +# +# End of Chat_Pipeline.py +####################################################################################################################### diff --git a/App_Function_Libraries/DB/Character_Chat_DB.py b/App_Function_Libraries/DB/Character_Chat_DB.py index 63bbac637..93114ad93 100644 --- a/App_Function_Libraries/DB/Character_Chat_DB.py +++ b/App_Function_Libraries/DB/Character_Chat_DB.py @@ -355,17 +355,20 @@ def add_character_card(card_data: Dict[str, Any]) -> Optional[int]: def get_character_cards() -> List[Dict]: """Retrieve all character cards from the database.""" - logging.debug(f"Fetching characters from DB: {chat_DB_PATH}") - conn = sqlite3.connect(chat_DB_PATH) - cursor = conn.cursor() - cursor.execute("SELECT * FROM CharacterCards") - rows = cursor.fetchall() - columns = [description[0] for description in cursor.description] - conn.close() - characters = [dict(zip(columns, row)) for row in rows] - #logging.debug(f"Characters fetched from DB: {characters}") - return characters - + try: + logging.debug(f"Fetching characters from DB: {chat_DB_PATH}") + conn = sqlite3.connect(chat_DB_PATH) + cursor = conn.cursor() + cursor.execute("SELECT * FROM CharacterCards") + rows = cursor.fetchall() + columns = [description[0] for description in cursor.description] + conn.close() + characters = [dict(zip(columns, row)) for row in rows] + logging.debug(f"Characters fetched from DB: {characters}") + return characters + except Exception as e: + logging.error(f"Error fetching character cards: {e}") + return [] def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]: """ diff --git a/App_Function_Libraries/Gradio_Related.py b/App_Function_Libraries/Gradio_Related.py index 55e175749..8b82375c1 100644 --- a/App_Function_Libraries/Gradio_Related.py +++ b/App_Function_Libraries/Gradio_Related.py @@ -65,6 +65,7 @@ create_utilities_yt_video_tab from App_Function_Libraries.Gradio_UI.Video_transcription_tab import create_video_transcription_tab from App_Function_Libraries.Gradio_UI.View_tab import create_manage_items_tab +from App_Function_Libraries.Gradio_UI.WebSearch_tab import create_websearch_tab from App_Function_Libraries.Gradio_UI.Website_scraping_tab import create_website_scraping_tab from App_Function_Libraries.Gradio_UI.Workflows_tab import chat_workflows_tab from App_Function_Libraries.Gradio_UI.View_DB_Items_tab import create_view_all_mediadb_with_versions_tab, \ @@ -403,6 +404,10 @@ def launch_ui(share_public=None, server_mode=False): padding: 10px; margin-top: 10px; } + .scrollable-textbox textarea { + height: 600px !important; + overflow-y: auto !important; + } """ config = load_and_log_configs() @@ -464,6 +469,8 @@ def launch_ui(share_public=None, server_mode=False): create_chat_interface_four() chat_workflows_tab() + with gr.TabItem("Web Search & Review", id="websearch group", visible=True): + create_websearch_tab() with gr.TabItem("Character Chat", id="character chat group", visible=True): create_character_card_interaction_tab() create_character_chat_mgmt_tab() diff --git a/App_Function_Libraries/Gradio_UI/Character_Chat_tab.py b/App_Function_Libraries/Gradio_UI/Character_Chat_tab.py index 56ee24113..ebb498bc3 100644 --- a/App_Function_Libraries/Gradio_UI/Character_Chat_tab.py +++ b/App_Function_Libraries/Gradio_UI/Character_Chat_tab.py @@ -2,6 +2,7 @@ # Description: Library for character card import functions # # Imports +import os from datetime import datetime import re import tempfile @@ -34,6 +35,7 @@ delete_character_card, update_character_card, search_character_chats, save_chat_history_to_character_db, ) +from App_Function_Libraries.TTS.TTS_Providers import generate_audio, play_mp3, play_audio_file from App_Function_Libraries.Utils.Utils import sanitize_user_input, format_api_name, global_api_endpoints, \ default_api_endpoint, load_comprehensive_config @@ -266,895 +268,1081 @@ def create_character_card_interaction_tab(): logging.error(f"Error setting default API endpoint: {str(e)}") default_value = None with gr.TabItem("Chat with a Character Card", visible=True): - gr.Markdown("# Chat with a Character Card") - with gr.Row(): - with gr.Column(scale=1): - # Checkbox to Decide Whether to Save Chats by Default - config = load_comprehensive_config() - auto_save_value = config.get('auto-save', 'save_character_chats', fallback='False') - auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=auto_save_value) - chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True) - save_chat_history_to_db = gr.Button("Save Chat History to Database") - save_status = gr.Textbox(label="Status", interactive=False) - with gr.Column(scale=2): - gr.Markdown("## Search and Load Existing Chats") - chat_search_query = gr.Textbox( - label="Search Chats", - placeholder="Enter chat name or keywords to search" - ) - chat_search_button = gr.Button("Search Chats") - chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False) - load_chat_button = gr.Button("Load Selected Chat", visible=False) - - with gr.Row(): - with gr.Column(scale=1): - character_image = gr.Image(label="Character Image", type="pil") - character_card_upload = gr.File( - label="Upload Character Card (PNG, WEBP, JSON)", - file_types=[".png", ".webp", ".json"] - ) - import_card_button = gr.Button("Import Character Card") - load_characters_button = gr.Button("Load Existing Characters") - character_dropdown = gr.Dropdown(label="Select Character", choices=[]) - user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here") - # Refactored API selection dropdown - api_name_input = gr.Dropdown( - choices=["None"] + [format_api_name(api) for api in global_api_endpoints], - value=default_value, - label="API for Interaction (Mandatory)" - ) - api_key_input = gr.Textbox( - label="API Key (if not set in Config_Files/config.txt)", - placeholder="Enter your API key here", type="password" - ) - temperature_slider = gr.Slider( - minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature" - ) - chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True) - import_chat_button = gr.Button("Import Chat History") - - with gr.Column(scale=2): - chat_history = gr.Chatbot(label="Conversation", height=800) - user_input = gr.Textbox(label="Your message") - send_message_button = gr.Button("Send Message") - answer_for_me_button = gr.Button("Answer for Me") - continue_talking_button = gr.Button("Continue Talking") - regenerate_button = gr.Button("Regenerate Last Message") - token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False) - clear_chat_button = gr.Button("Clear Chat") - save_snapshot_button = gr.Button("Save Chat Snapshot") - update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False) - load_selected_chat_button = gr.Button("Load Selected Chat", visible=False) - update_chat_button = gr.Button("Update Selected Chat", visible=False) - - # States - character_data = gr.State(None) - user_name = gr.State("") - selected_chat_id = gr.State(None) # To track the selected chat for updates - - # Callback Functions + with gr.Blocks() as interface: + gr.Markdown("# Chat with a Character Card") + with gr.Row(): + with gr.Column(scale=1): + # Checkbox to Decide Whether to Save Chats by Default + config = load_comprehensive_config() + auto_save_value = config.get('auto-save', 'save_character_chats', fallback='False') + auto_save_checkbox = gr.Checkbox(label="Save chats automatically", value=auto_save_value) + chat_media_name = gr.Textbox(label="Custom Chat Name (optional)", visible=True) + save_chat_history_to_db = gr.Button("Save Chat History to Database") + save_status = gr.Textbox(label="Status", interactive=False) + with gr.Column(scale=2): + gr.Markdown("## Search and Load Existing Chats") + chat_search_query = gr.Textbox( + label="Search Chats", + placeholder="Enter chat name or keywords to search" + ) + chat_search_button = gr.Button("Search Chats") + chat_search_dropdown = gr.Dropdown(label="Search Results", choices=[], visible=False) + load_chat_button = gr.Button("Load Selected Chat", visible=False) + + with gr.Row(): + with gr.Column(scale=1): + character_image = gr.Image(label="Character Image", type="pil") + character_card_upload = gr.File( + label="Upload Character Card (PNG, WEBP, JSON)", + file_types=[".png", ".webp", ".json"] + ) + import_card_button = gr.Button("Import Character Card") + load_characters_button = gr.Button("Load Existing Characters") + character_dropdown = gr.Dropdown(label="Select Character", choices=[]) + user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here") + # Refactored API selection dropdown + api_name_input = gr.Dropdown( + choices=["None"] + [format_api_name(api) for api in global_api_endpoints], + value=default_value, + label="API for Interaction (Mandatory)" + ) + api_key_input = gr.Textbox( + label="API Key (if not set in Config_Files/config.txt)", + placeholder="Enter your API key here", type="password" + ) + temperature_slider = gr.Slider( + minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature" + ) + with gr.Row(): + minp_slider = gr.Slider( + minimum=0.0, maximum=1.0, value=0.00, step=0.01, label="Min-P" + ) + with gr.Row(): + maxp_slider = gr.Slider( + minimum=0.00, maximum=1.0, value=1.00, step=0.01, label="Top-P" + ) + chat_file_upload = gr.File(label="Upload Chat History JSON", visible=True) + import_chat_button = gr.Button("Import Chat History") + + with gr.Column(scale=2): + chat_history = gr.Chatbot(label="Conversation", height=800) + user_input = gr.Textbox(label="Your message") + with gr.Row(): + streaming = gr.Checkbox(label="Enable streaming", value=True, interactive=True) + auto_speak_checkbox = gr.Checkbox(label="Auto-speak response", value=False, interactive=True) + send_message_button = gr.Button("Send Message") + with gr.Row(): + speak_button = gr.Button("Speak Response") + tts_status = gr.Textbox(label="TTS Status", interactive=False) + with gr.Row(): + answer_for_me_button = gr.Button("Answer for Me") + continue_talking_button = gr.Button("Continue Talking") + regenerate_button = gr.Button("Regenerate Last Message") + with gr.Row(): + token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False) + clear_chat_button = gr.Button("Clear Chat") + save_snapshot_button = gr.Button("Save Chat Snapshot") + update_chat_dropdown = gr.Dropdown(label="Select Chat to Update", choices=[], visible=False) + load_selected_chat_button = gr.Button("Load Selected Chat", visible=False) + update_chat_button = gr.Button("Update Selected Chat", visible=False) + + # States + character_data = gr.State(None) + user_name = gr.State("") + selected_chat_id = gr.State(None) # To track the selected chat for updates + + # Callback Functions + + def search_existing_chats(query): + logging.info(f"Searching for chats with query: {query}") + results, message = search_character_chats(query) + if results: + # Format search results for dropdown + formatted_results = [ + f"{chat['conversation_name']} (ID: {chat['id']})" for chat in results + ] + else: + formatted_results = [] + return formatted_results, message - def search_existing_chats(query): - results, message = search_character_chats(query) - if results: - # Format search results for dropdown - formatted_results = [ - f"{chat['conversation_name']} (ID: {chat['id']})" for chat in results - ] - else: - formatted_results = [] - return formatted_results, message + def load_selected_chat_from_search(selected_chat, user_name): + logging.info(f"Loading selected chat: {selected_chat}") + if not selected_chat: + return None, [], None, "No chat selected." - def load_selected_chat_from_search(selected_chat, user_name): - if not selected_chat: - return None, [], None, "No chat selected." + try: + chat_id_match = re.search(r'\(ID:\s*(\d+)\)', selected_chat) + if not chat_id_match: + return None, [], None, "Invalid chat selection format." - try: - chat_id_match = re.search(r'\(ID:\s*(\d+)\)', selected_chat) - if not chat_id_match: - return None, [], None, "Invalid chat selection format." + chat_id = int(chat_id_match.group(1)) - chat_id = int(chat_id_match.group(1)) + # Use the new function to load chat and character data + char_data, chat_history, img = load_chat_and_character(chat_id, user_name) - # Use the new function to load chat and character data - char_data, chat_history, img = load_chat_and_character(chat_id, user_name) + if not char_data: + return None, [], None, "Failed to load character data for the selected chat." - if not char_data: - return None, [], None, "Failed to load character data for the selected chat." + return char_data, chat_history, img, f"Chat '{selected_chat}' loaded successfully." + except Exception as e: + logging.error(f"Error loading selected chat: {e}") + return None, [], None, f"Error loading chat: {e}" - return char_data, chat_history, img, f"Chat '{selected_chat}' loaded successfully." - except Exception as e: - logging.error(f"Error loading selected chat: {e}") - return None, [], None, f"Error loading chat: {e}" + def import_chat_history(file, current_history, char_data, user_name_val): + """ + Imports chat history from a file, replacing '{{user}}' with the actual user name. - def import_chat_history(file, current_history, char_data, user_name_val): - """ - Imports chat history from a file, replacing '{{user}}' with the actual user name. + Args: + file (file): The uploaded chat history file. + current_history (list): The current chat history. + char_data (dict): The current character data. + user_name_val (str): The user's name. - Args: - file (file): The uploaded chat history file. - current_history (list): The current chat history. - char_data (dict): The current character data. - user_name_val (str): The user's name. + Returns: + tuple: Updated chat history, updated character data, and a status message. + """ + logging.info(f"Importing chat history from file: {file.name}") + loaded_history, char_name = load_chat_history(file) + if loaded_history is None: + return current_history, char_data, "Failed to load chat history." - Returns: - tuple: Updated chat history, updated character data, and a status message. - """ - loaded_history, char_name = load_chat_history(file) - if loaded_history is None: - return current_history, char_data, "Failed to load chat history." - - # Replace '{{user}}' in the loaded chat history - loaded_history = replace_user_placeholder(loaded_history, user_name_val) - - # Check if the loaded chat is for the current character - if char_data and char_data.get('name') != char_name: - return current_history, char_data, ( - f"Warning: Loaded chat is for character '{char_name}', " - f"but current character is '{char_data.get('name')}'. Chat not imported." - ) + # Replace '{{user}}' in the loaded chat history + loaded_history = replace_user_placeholder(loaded_history, user_name_val) - # If no character is selected, try to load the character from the chat - if not char_data: - characters = get_character_cards() - character = next((char for char in characters if char['name'] == char_name), None) - if character: - char_data = character - # Replace '{{user}}' in the first_message if necessary - if character.get('first_message'): - character['first_message'] = character['first_message'].replace("{{user}}", - user_name_val if user_name_val else "User") - else: + # Check if the loaded chat is for the current character + if char_data and char_data.get('name') != char_name: return current_history, char_data, ( - f"Warning: Character '{char_name}' not found. Please select the character manually." + f"Warning: Loaded chat is for character '{char_name}', " + f"but current character is '{char_data.get('name')}'. Chat not imported." ) - return loaded_history, char_data, f"Chat history for '{char_name}' imported successfully." - - def load_character(name): - characters = get_character_cards() - character = next((char for char in characters if char['name'] == name), None) - if character: - first_message = character.get('first_message', "Hello! I'm ready to chat.") - return character, [(None, first_message)] if first_message else [], None - return None, [], None - - def load_character_image(name): - character = next((char for char in get_character_cards() if char['name'] == name), None) - if character and 'image' in character and character['image']: - try: - # Decode the base64 image - image_data = base64.b64decode(character['image']) - # Load as PIL Image - img = Image.open(io.BytesIO(image_data)).convert("RGBA") - return img - except Exception as e: - logging.error(f"Error loading image for character '{name}': {e}") - return None - return None - - def character_chat_wrapper( - message, history, char_data, api_endpoint, api_key, - temperature, user_name_val, auto_save - ): - if not char_data: - return history, "Please select a character first." - - user_name_val = user_name_val or "User" - char_name = char_data.get('name', 'AI Assistant') - - # Prepare the character's background information - char_background = f""" - Name: {char_name} - Description: {char_data.get('description', 'N/A')} - Personality: {char_data.get('personality', 'N/A')} - Scenario: {char_data.get('scenario', 'N/A')} - """ - - # Prepare the system prompt - system_message = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}""" - - # Prepare chat context - media_content = { - 'id': char_name, - 'title': char_name, - 'content': char_background, - 'description': char_data.get('description', ''), - 'personality': char_data.get('personality', ''), - 'scenario': char_data.get('scenario', '') - } - selected_parts = ['description', 'personality', 'scenario'] - - prompt = char_data.get('post_history_instructions', '') - - # Sanitize and format user message - user_message = sanitize_user_input(message) - user_message = replace_placeholders(user_message, char_name, user_name_val) - full_message = f"{user_name_val}: {user_message}" - - # Generate bot response - bot_message = chat( - full_message, - history, - media_content, - selected_parts, - api_endpoint, - api_key, - prompt, - temperature, - system_message - ) - - # Replace placeholders in bot message - bot_message = replace_placeholders(bot_message, char_name, user_name_val) - - # Update history - history.append((user_message, bot_message)) + # If no character is selected, try to load the character from the chat + if not char_data: + characters = get_character_cards() + character = next((char for char in characters if char['name'] == char_name), None) + if character: + char_data = character + # Replace '{{user}}' in the first_message if necessary + if character.get('first_message'): + character['first_message'] = character['first_message'].replace("{{user}}", + user_name_val if user_name_val else "User") + else: + return current_history, char_data, ( + f"Warning: Character '{char_name}' not found. Please select the character manually." + ) + + return loaded_history, char_data, f"Chat history for '{char_name}' imported successfully." + + def character_chat_wrapper( + message, history, char_data, api_endpoint, api_key, + temperature, user_name_val, auto_save, streaming, minp, maxp + ): + if not char_data: + return history, "Please select a character first." + + # Sanitize the initial history to ensure no None values + sanitized_history = [] + for entry in history: + if entry is None: + sanitized_history.append(("", "")) # Replace None with an empty tuple + elif isinstance(entry, (list, tuple)) and len(entry) == 2: + # Ensure both elements are strings + user_msg = entry[0] if entry[0] is not None else "" + bot_msg = entry[1] if entry[1] is not None else "" + sanitized_history.append((user_msg, bot_msg)) + else: + # If the entry is invalid, replace it with an empty tuple + sanitized_history.append(("", "")) + history = sanitized_history + + user_name_val = user_name_val or "User" + char_name = char_data.get('name', 'AI Assistant') + + # Prepare the character's background information + char_background = f""" + Name: {char_name} + Description: {char_data.get('description', 'N/A')} + Personality: {char_data.get('personality', 'N/A')} + Scenario: {char_data.get('scenario', 'N/A')} + """ + + # Prepare the system prompt + system_message = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}""" + + # Prepare chat context + media_content = { + 'id': char_name, + 'title': char_name, + 'content': char_background, + 'description': char_data.get('description', ''), + 'personality': char_data.get('personality', ''), + 'scenario': char_data.get('scenario', '') + } + selected_parts = ['description', 'personality', 'scenario'] + + prompt = char_data.get('post_history_instructions', '') + + # Sanitize and format user message + user_message = sanitize_user_input(message) + user_message = replace_placeholders(user_message, char_name, user_name_val) + full_message = f"{user_name_val}: {user_message}" + + # Generate bot response + logging.debug(f"Generating response; Calling chat function with message: {full_message}") + bot_message = chat( + full_message, + history, + media_content, + selected_parts, + api_endpoint, + api_key, + prompt, + temperature, + system_message, + streaming, + minp=minp, + maxp=maxp + ) - # Auto-save if enabled - save_status = "" - if auto_save: - character_id = char_data.get('id') - if character_id: - conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - add_character_chat(character_id, conversation_name, history) - save_status = "Chat auto-saved." + # Handle streaming response + if streaming: + history.append((user_message, "")) # Append user message with an empty bot response + full_response = "" + for chunk in bot_message: + full_response += chunk + history[-1] = (user_message, full_response) + yield history, "" # Yield updated history and empty status + + # After streaming is complete, handle auto-save + save_status = "" + if auto_save: + character_id = char_data.get('id') + if character_id: + conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + add_character_chat(character_id, conversation_name, history) + save_status = "Chat auto-saved." + else: + save_status = "Character ID not found; chat not saved." + yield history, save_status else: - save_status = "Character ID not found; chat not saved." + # For non-streaming, append the full bot response to the history + bot_message = replace_placeholders(bot_message, char_name, user_name_val) + history.append((user_message, bot_message)) + logging.debug(f"Updated history (non-streaming): {history}") + + # Auto-save if enabled + save_status = "" + if auto_save: + character_id = char_data.get('id') + if character_id: + conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + add_character_chat(character_id, conversation_name, history) + save_status = "Chat auto-saved." + else: + save_status = "Character ID not found; chat not saved." + + return history, save_status + + def validate_chat_history(chat_history: List[Tuple[Optional[str], str]]) -> bool: + """ + Validate the chat history format and content. + + Args: + chat_history: List of message tuples (user_message, bot_message) + + Returns: + bool: True if valid, False if invalid + """ + if not isinstance(chat_history, list): + return False - return history, save_status + for entry in chat_history: + if not isinstance(entry, tuple) or len(entry) != 2: + return False + # First element can be None (for system messages) or str + if not (entry[0] is None or isinstance(entry[0], str)): + return False + # Second element (bot response) must be str and not empty + if not isinstance(entry[1], str) or not entry[1].strip(): + return False + + return True + + def sanitize_conversation_name(name: str) -> str: + """ + Sanitize the conversation name. + + Args: + name: Raw conversation name + + Returns: + str: Sanitized conversation name + """ + # Remove any non-alphanumeric characters except spaces and basic punctuation + sanitized = re.sub(r'[^a-zA-Z0-9\s\-_.]', '', name) + # Limit length + sanitized = sanitized[:100] + # Ensure it's not empty + if not sanitized.strip(): + sanitized = f"Chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + return sanitized + + def save_chat_history_to_db_wrapper( + chat_history: List[Tuple[Optional[str], str]], + conversation_id: str, + media_content: Dict, + chat_media_name: str, + char_data: Dict, + auto_save: bool + ) -> Tuple[str, str]: + """ + Save chat history to the database with validation. + + Args: + chat_history: List of message tuples + conversation_id: Current conversation ID + media_content: Media content metadata + chat_media_name: Custom name for the chat + char_data: Character data dictionary + auto_save: Auto-save flag + + Returns: + Tuple[str, str]: (status message, detail message) + """ + try: + # Basic input validation + if not chat_history: + return "No chat history to save.", "" - def validate_chat_history(chat_history: List[Tuple[Optional[str], str]]) -> bool: - """ - Validate the chat history format and content. + if not validate_chat_history(chat_history): + return "Invalid chat history format.", "Please ensure the chat history is valid." - Args: - chat_history: List of message tuples (user_message, bot_message) + if not char_data: + return "No character selected.", "Please select a character first." - Returns: - bool: True if valid, False if invalid - """ - if not isinstance(chat_history, list): - return False + character_id = char_data.get('id') + if not character_id: + return "Invalid character data: No character ID found.", "" - for entry in chat_history: - if not isinstance(entry, tuple) or len(entry) != 2: - return False - # First element can be None (for system messages) or str - if not (entry[0] is None or isinstance(entry[0], str)): - return False - # Second element (bot response) must be str and not empty - if not isinstance(entry[1], str) or not entry[1].strip(): - return False - - return True - - def sanitize_conversation_name(name: str) -> str: - """ - Sanitize the conversation name. + # Sanitize and prepare conversation name + conversation_name = sanitize_conversation_name( + chat_media_name if chat_media_name.strip() + else f"Chat with {char_data.get('name', 'Unknown')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + ) - Args: - name: Raw conversation name + # Save to the database using your existing function + chat_id = save_chat_history_to_character_db( + character_id=character_id, + conversation_name=conversation_name, + chat_history=chat_history + ) - Returns: - str: Sanitized conversation name - """ - # Remove any non-alphanumeric characters except spaces and basic punctuation - sanitized = re.sub(r'[^a-zA-Z0-9\s\-_.]', '', name) - # Limit length - sanitized = sanitized[:100] - # Ensure it's not empty - if not sanitized.strip(): - sanitized = f"Chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}" - return sanitized - - def save_chat_history_to_db_wrapper( - chat_history: List[Tuple[Optional[str], str]], - conversation_id: str, - media_content: Dict, - chat_media_name: str, - char_data: Dict, - auto_save: bool - ) -> Tuple[str, str]: - """ - Save chat history to the database with validation. + if chat_id: + success_message = ( + f"Chat saved successfully!\n" + f"ID: {chat_id}\n" + f"Name: {conversation_name}\n" + f"Messages: {len(chat_history)}" + ) + return success_message, "" + else: + return "Failed to save chat to database.", "Database operation failed." - Args: - chat_history: List of message tuples - conversation_id: Current conversation ID - media_content: Media content metadata - chat_media_name: Custom name for the chat - char_data: Character data dictionary - auto_save: Auto-save flag + except Exception as e: + logging.error(f"Error saving chat history: {str(e)}", exc_info=True) + return f"Error saving chat: {str(e)}", "Please check the logs for more details." + + def update_character_info(name): + return load_character_and_image(name, user_name.value) + + def on_character_select(name, user_name_val): + logging.debug(f"Character selected: {name}") + char_data, chat_history, img = load_character_and_image(name, user_name_val) + return char_data, chat_history, img + + def clear_chat_history(char_data, user_name_val): + """ + Clears the chat history and initializes it with the character's first message, + replacing the '{{user}}' placeholder with the actual user name. + + Args: + char_data (dict): The current character data. + user_name_val (str): The user's name. + + Returns: + tuple: Updated chat history and the unchanged char_data. + """ + if char_data and 'first_message' in char_data and char_data['first_message']: + # Replace '{{user}}' in the first_message + first_message = char_data['first_message'].replace("{{user}}", + user_name_val if user_name_val else "User") + # Initialize chat history with the updated first_message + return [(None, first_message)], char_data + else: + # If no first_message is defined, simply clear the chat + return [], char_data + + def regenerate_last_message( + history, char_data, api_endpoint, api_key, + temperature, user_name_val, auto_save, streaming_checkbox, minp, maxp): + """ + Regenerates the last bot message by removing it and resending the corresponding user message. + + Args: + history (list): The current chat history as a list of tuples (user_message, bot_message). + char_data (dict): The current character data. + api_endpoint (str): The API endpoint to use for the LLM. + api_key (str): The API key for authentication. + temperature (float): The temperature setting for the LLM. + user_name_val (str): The user's name. + auto_save (bool): Flag indicating whether to auto-save the chat. + + Returns: + tuple: Updated chat history and a save status message. + """ + try: + streaming = streaming_checkbox.value if hasattr(streaming_checkbox, 'value') else streaming_checkbox + if not history: + return history, "No messages to regenerate." + + last_entry = history[-1] + last_user_message, last_bot_message = last_entry + + # Check if the last bot message exists + if last_bot_message is None: + return history, "The last message is not from the bot." + + # Remove the last bot message + new_history = history[:-1] + + # Resend the last user message to generate a new bot response + if not last_user_message: + return new_history, "No user message to regenerate the bot response." + + # Prepare the character's background information + char_name = char_data.get('name', 'AI Assistant') + char_background = f""" + Name: {char_name} + Description: {char_data.get('description', 'N/A')} + Personality: {char_data.get('personality', 'N/A')} + Scenario: {char_data.get('scenario', 'N/A')} + """ + + # Prepare the system prompt for character impersonation + system_message = f"""You are roleplaying as {char_name}, the character described below. Respond to the user's messages in character, maintaining the personality and background provided. Do not break character or refer to yourself as an AI. Always refer to yourself as "{char_name}" and refer to the user as "{user_name_val}". + + {char_background} + + Additional instructions: {char_data.get('post_history_instructions', '')} + """ + + # Prepare media_content and selected_parts + media_content = { + 'id': char_name, + 'title': char_name, + 'content': char_background, + 'description': char_data.get('description', ''), + 'personality': char_data.get('personality', ''), + 'scenario': char_data.get('scenario', '') + } + selected_parts = ['description', 'personality', 'scenario'] + + prompt = char_data.get('post_history_instructions', '') + + # Prepare the input for the chat function + full_message = f"{user_name_val}: {last_user_message}" if last_user_message else f"{user_name_val}: " + + # Call the chat function to get a new bot message + bot_message = chat( + full_message, + new_history, + media_content, + selected_parts, + api_endpoint, + api_key, + prompt, + temperature, + system_message, + streaming, + minp=minp, + maxp=maxp + ) - Returns: - Tuple[str, str]: (status message, detail message) - """ - try: - # Basic input validation - if not chat_history: - return "No chat history to save.", "" + # Handle streaming response + if streaming: + full_response = "" + for chunk in bot_message: + full_response += chunk + yield new_history + [(last_user_message, full_response)], "" # Yield updated history and empty status + + # After streaming is complete, update history and handle auto-save + new_history.append((last_user_message, full_response)) + else: + # Replace placeholders in bot message + full_response = replace_placeholders(bot_message, char_name, user_name_val) + # Update history + new_history.append((last_user_message, full_response)) + + # Auto-save if enabled + save_status = "" + if auto_save: + character_id = char_data.get('id') + if character_id: + conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + add_character_chat(character_id, conversation_name, new_history) + save_status = "Chat auto-saved." + else: + save_status = "Character ID not found; chat not saved." + + return new_history, save_status - if not validate_chat_history(chat_history): - return "Invalid chat history format.", "Please ensure the chat history is valid." + except Exception as e: + save_status = f"Error regenerating message: {str(e)}" + return history, save_status # Return original history if an error occurs - if not char_data: - return "No character selected.", "Please select a character first." + def save_untracked_chat_action(history, char_data): + if not char_data or not history: + return "No chat to save or character not selected." character_id = char_data.get('id') if not character_id: - return "Invalid character data: No character ID found.", "" - - # Sanitize and prepare conversation name - conversation_name = sanitize_conversation_name( - chat_media_name if chat_media_name.strip() - else f"Chat with {char_data.get('name', 'Unknown')} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - ) - - # Save to the database using your existing function - chat_id = save_chat_history_to_character_db( - character_id=character_id, - conversation_name=conversation_name, - chat_history=chat_history - ) + return "Character ID not found." + conversation_name = f"Snapshot {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + chat_id = add_character_chat(character_id, conversation_name, history, is_snapshot=True) if chat_id: - success_message = ( - f"Chat saved successfully!\n" - f"ID: {chat_id}\n" - f"Name: {conversation_name}\n" - f"Messages: {len(chat_history)}" - ) - return success_message, "" + return f"Chat snapshot saved successfully with ID {chat_id}." else: - return "Failed to save chat to database.", "Database operation failed." - - except Exception as e: - logging.error(f"Error saving chat history: {str(e)}", exc_info=True) - return f"Error saving chat: {str(e)}", "Please check the logs for more details." - - def update_character_info(name): - return load_character_and_image(name, user_name.value) - - def on_character_select(name, user_name_val): - logging.debug(f"Character selected: {name}") - char_data, chat_history, img = load_character_and_image(name, user_name_val) - return char_data, chat_history, img - - def clear_chat_history(char_data, user_name_val): - """ - Clears the chat history and initializes it with the character's first message, - replacing the '{{user}}' placeholder with the actual user name. - - Args: - char_data (dict): The current character data. - user_name_val (str): The user's name. - - Returns: - tuple: Updated chat history and the unchanged char_data. - """ - if char_data and 'first_message' in char_data and char_data['first_message']: - # Replace '{{user}}' in the first_message - first_message = char_data['first_message'].replace("{{user}}", - user_name_val if user_name_val else "User") - # Initialize chat history with the updated first_message - return [(None, first_message)], char_data - else: - # If no first_message is defined, simply clear the chat - return [], char_data - - def regenerate_last_message( - history, char_data, api_endpoint, api_key, - temperature, user_name_val, auto_save - ): - """ - Regenerates the last bot message by removing it and resending the corresponding user message. - - Args: - history (list): The current chat history as a list of tuples (user_message, bot_message). - char_data (dict): The current character data. - api_endpoint (str): The API endpoint to use for the LLM. - api_key (str): The API key for authentication. - temperature (float): The temperature setting for the LLM. - user_name_val (str): The user's name. - auto_save (bool): Flag indicating whether to auto-save the chat. - - Returns: - tuple: Updated chat history and a save status message. - """ - if not history: - return history, "No messages to regenerate." - - last_entry = history[-1] - last_user_message, last_bot_message = last_entry - - # Check if the last bot message exists - if last_bot_message is None: - return history, "The last message is not from the bot." - - # Remove the last bot message - new_history = history[:-1] - - # Resend the last user message to generate a new bot response - if not last_user_message: - return new_history, "No user message to regenerate the bot response." - - # Prepare the character's background information - char_name = char_data.get('name', 'AI Assistant') - char_background = f""" - Name: {char_name} - Description: {char_data.get('description', 'N/A')} - Personality: {char_data.get('personality', 'N/A')} - Scenario: {char_data.get('scenario', 'N/A')} - """ - - # Prepare the system prompt for character impersonation - system_message = f"""You are roleplaying as {char_name}, the character described below. Respond to the user's messages in character, maintaining the personality and background provided. Do not break character or refer to yourself as an AI. Always refer to yourself as "{char_name}" and refer to the user as "{user_name_val}". - - {char_background} - - Additional instructions: {char_data.get('post_history_instructions', '')} - """ - - # Prepare media_content and selected_parts - media_content = { - 'id': char_name, - 'title': char_name, - 'content': char_background, - 'description': char_data.get('description', ''), - 'personality': char_data.get('personality', ''), - 'scenario': char_data.get('scenario', '') - } - selected_parts = ['description', 'personality', 'scenario'] - - prompt = char_data.get('post_history_instructions', '') - - # Prepare the input for the chat function - full_message = f"{user_name_val}: {last_user_message}" if last_user_message else f"{user_name_val}: " - - # Call the chat function to get a new bot message - bot_message = chat( - full_message, - new_history, - media_content, - selected_parts, - api_endpoint, - api_key, - prompt, - temperature, - system_message - ) + return "Failed to save chat snapshot." + + def select_chat_for_update(): + # Fetch all chats for the selected character + if character_data.value: + character_id = character_data.value.get('id') + if character_id: + chats = get_character_chats(character_id) + chat_choices = [ + f"{chat['conversation_name']} (ID: {chat['id']})" for chat in chats + ] + return gr.update(choices=chat_choices), None + return gr.update(choices=[]), "No character selected." + + def load_selected_chat(chat_selection): + if not chat_selection: + return [], "No chat selected." - # Append the new bot message to the history - new_history.append((last_user_message, bot_message)) + try: + chat_id = int(chat_selection.split('(ID: ')[1].rstrip(')')) + chat = get_character_chat_by_id(chat_id) + if chat: + history = chat['chat_history'] + selected_chat_id.value = chat_id # Update the selected_chat_id state + return history, f"Loaded chat '{chat['conversation_name']}' successfully." + else: + return [], "Chat not found." + except Exception as e: + logging.error(f"Error loading selected chat: {e}") + return [], f"Error loading chat: {e}" - # Auto-save if enabled - if auto_save: - character_id = char_data.get('id') - if character_id: - conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - add_character_chat(character_id, conversation_name, new_history) - save_status = "Chat auto-saved." + def update_chat(chat_id, updated_history): + success = update_character_chat(chat_id, updated_history) + if success: + return "Chat updated successfully." else: - save_status = "Character ID not found; chat not saved." - else: - save_status = "" - - return new_history, save_status - - def toggle_chat_file_upload(): - return gr.update(visible=True) - - def save_untracked_chat_action(history, char_data): - if not char_data or not history: - return "No chat to save or character not selected." - - character_id = char_data.get('id') - if not character_id: - return "Character ID not found." - - conversation_name = f"Snapshot {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - chat_id = add_character_chat(character_id, conversation_name, history, is_snapshot=True) - if chat_id: - return f"Chat snapshot saved successfully with ID {chat_id}." - else: - return "Failed to save chat snapshot." - - def select_chat_for_update(): - # Fetch all chats for the selected character - if character_data.value: - character_id = character_data.value.get('id') - if character_id: - chats = get_character_chats(character_id) - chat_choices = [ - f"{chat['conversation_name']} (ID: {chat['id']})" for chat in chats - ] - return gr.update(choices=chat_choices), None - return gr.update(choices=[]), "No character selected." + return "Failed to update chat." + + def continue_talking( + history, char_data, api_endpoint, api_key, + temperature, user_name_val, auto_save, streaming_checkbox, minp, maxp + ): + """ + Causes the character to continue the conversation or think out loud. + """ + streaming = streaming_checkbox.value if hasattr(streaming_checkbox, 'value') else streaming_checkbox + if not char_data: + return history, "Please select a character first." + + user_name_val = user_name_val or "User" + char_name = char_data.get('name', 'AI Assistant') + + # Prepare the character's background information + char_background = f""" + Name: {char_name} + Description: {char_data.get('description', 'N/A')} + Personality: {char_data.get('personality', 'N/A')} + Scenario: {char_data.get('scenario', 'N/A')} + """ + + # Prepare the system prompt + system_message = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')} + If the user does not respond, continue expressing your thoughts or continue the conversation by thinking out loud. If thinking out loud, prefix the message with "Thinking: ".""" + + # Prepare chat context + media_content = { + 'id': char_name, + 'title': char_name, + 'content': char_background, + 'description': char_data.get('description', ''), + 'personality': char_data.get('personality', ''), + 'scenario': char_data.get('scenario', '') + } + selected_parts = ['description', 'personality', 'scenario'] + + prompt = char_data.get('post_history_instructions', '') + + # Simulate empty user input + user_message = "" + + # Generate bot response + bot_message = chat( + user_message, + history, + media_content, + selected_parts, + api_endpoint, + api_key, + prompt, + temperature, + system_message, + streaming, + minp=minp, + maxp=maxp + ) - def load_selected_chat(chat_selection): - if not chat_selection: - return [], "No chat selected." + # Handle streaming response + if streaming: + history.append((None, "")) # Append empty user message with an empty bot response + full_response = "" + for chunk in bot_message: + full_response += chunk + yield history + [(None, full_response)], "" # Yield updated history and empty status - try: - chat_id = int(chat_selection.split('(ID: ')[1].rstrip(')')) - chat = get_character_chat_by_id(chat_id) - if chat: - history = chat['chat_history'] - selected_chat_id.value = chat_id # Update the selected_chat_id state - return history, f"Loaded chat '{chat['conversation_name']}' successfully." + # After streaming is complete, update history and handle auto-save + history.append((None, full_response)) else: - return [], "Chat not found." - except Exception as e: - logging.error(f"Error loading selected chat: {e}") - return [], f"Error loading chat: {e}" - - def update_chat(chat_id, updated_history): - success = update_character_chat(chat_id, updated_history) - if success: - return "Chat updated successfully." - else: - return "Failed to update chat." + # Replace placeholders in bot message + full_response = replace_placeholders(bot_message, char_name, user_name_val) - def continue_talking( - history, char_data, api_endpoint, api_key, - temperature, user_name_val, auto_save - ): - """ - Causes the character to continue the conversation or think out loud. - """ - if not char_data: - return history, "Please select a character first." - - user_name_val = user_name_val or "User" - char_name = char_data.get('name', 'AI Assistant') - - # Prepare the character's background information - char_background = f""" - Name: {char_name} - Description: {char_data.get('description', 'N/A')} - Personality: {char_data.get('personality', 'N/A')} - Scenario: {char_data.get('scenario', 'N/A')} - """ - - # Prepare the system prompt - system_message = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')} - If the user does not respond, continue expressing your thoughts or continue the conversation by thinking out loud. If thinking out loud, prefix the message with "Thinking: ".""" - - # Prepare chat context - media_content = { - 'id': char_name, - 'title': char_name, - 'content': char_background, - 'description': char_data.get('description', ''), - 'personality': char_data.get('personality', ''), - 'scenario': char_data.get('scenario', '') - } - selected_parts = ['description', 'personality', 'scenario'] - - prompt = char_data.get('post_history_instructions', '') - - # Simulate empty user input - user_message = "" - - # Generate bot response - bot_message = chat( - user_message, - history, - media_content, - selected_parts, - api_endpoint, - api_key, - prompt, - temperature, - system_message - ) + # Update history + history.append((None, full_response)) - # Replace placeholders in bot message - bot_message = replace_placeholders(bot_message, char_name, user_name_val) + # Auto-save if enabled + save_status = "" + if auto_save: + character_id = char_data.get('id') + if character_id: + conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + add_character_chat(character_id, conversation_name, history) + save_status = "Chat auto-saved." + else: + save_status = "Character ID not found; chat not saved." + + return history, save_status + + def answer_for_me( + history, char_data, api_endpoint, api_key, + temperature, user_name_val, auto_save, streaming_checkbox, minp, maxp): + """ + Generates a likely user response and continues the conversation. + """ + streaming = streaming_checkbox.value if hasattr(streaming_checkbox, 'value') else streaming_checkbox + if not char_data: + return history, "Please select a character first." + + user_name_val = user_name_val or "User" + char_name = char_data.get('name', 'AI Assistant') + + # Prepare the character's background information + char_background = f""" + Name: {char_name} + Description: {char_data.get('description', 'N/A')} + Personality: {char_data.get('personality', 'N/A')} + Scenario: {char_data.get('scenario', 'N/A')} + """ + + # Prepare system message for generating user's response + system_message_user = f"""You are simulating the user {user_name_val}. Based on the conversation so far, generate a natural and appropriate response that {user_name_val} might say next. The response should fit the context and flow of the conversation. ONLY SPEAK FOR {user_name_val}.""" + + # Prepare chat context + media_content = { + 'id': char_name, + 'title': char_name, + 'content': char_background, + 'description': char_data.get('description', ''), + 'personality': char_data.get('personality', ''), + 'scenario': char_data.get('scenario', '') + } + selected_parts = ['description', 'personality', 'scenario'] + + # Generate user response + user_response = chat( + "", # No new message + history, + media_content, + selected_parts, + api_endpoint, + api_key, + prompt="", + temperature=temperature, + system_message=system_message_user, + streaming=streaming, + minp=minp, + maxp=maxp + ) - # Update history - history.append((None, bot_message)) + if streaming: + history.append(("", "")) # Append empty user message + full_user_response = "" + for chunk in user_response: + full_user_response += chunk + history[-1] = (full_user_response, "") + yield history, "" # Yield updated history and empty status + + # Now generate the character's response to this user response + system_message_bot = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}""" + + bot_message = chat( + f"{user_name_val}: {full_user_response}", + history[:-1], + media_content, + selected_parts, + api_endpoint, + api_key, + prompt=char_data.get('post_history_instructions', ''), + temperature=temperature, + system_message=system_message_bot, + streaming=streaming, + minp=minp, + maxp=maxp + ) - # Auto-save if enabled - save_status = "" - if auto_save: - character_id = char_data.get('id') - if character_id: - conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - add_character_chat(character_id, conversation_name, history) - save_status = "Chat auto-saved." + full_bot_response = "" + for chunk in bot_message: + full_bot_response += chunk + history[-1] = (full_user_response, full_bot_response) + yield history, "" # Yield updated history and empty status + + # After streaming is complete, handle auto-save + save_status = "" + if auto_save: + character_id = char_data.get('id') + if character_id: + conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + add_character_chat(character_id, conversation_name, history) + save_status = "Chat auto-saved." + else: + save_status = "Character ID not found; chat not saved." + yield history, save_status else: - save_status = "Character ID not found; chat not saved." + # Append the generated user response to history + history.append((user_response, None)) + + # Now generate the character's response to this user response + # Prepare the system message for the character + system_message_bot = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}""" + + bot_message = chat( + f"{user_name_val}: {user_response}", + history[:-1], + media_content, + selected_parts, + api_endpoint, + api_key, + prompt=char_data.get('post_history_instructions', ''), + temperature=temperature, + system_message=system_message_bot, + streaming=streaming, + minp=minp, + maxp=maxp + ) - return history, save_status + # Replace placeholders in bot message + bot_message = replace_placeholders(bot_message, char_name, user_name_val) - def answer_for_me( - history, char_data, api_endpoint, api_key, - temperature, user_name_val, auto_save - ): - """ - Generates a likely user response and continues the conversation. - """ - if not char_data: - return history, "Please select a character first." - - user_name_val = user_name_val or "User" - char_name = char_data.get('name', 'AI Assistant') - - # Prepare the character's background information - char_background = f""" - Name: {char_name} - Description: {char_data.get('description', 'N/A')} - Personality: {char_data.get('personality', 'N/A')} - Scenario: {char_data.get('scenario', 'N/A')} - """ + # Update history with bot's response + history[-1] = (user_response, bot_message) - # Prepare system message for generating user's response - system_message_user = f"""You are simulating the user {user_name_val}. Based on the conversation so far, generate a natural and appropriate response that {user_name_val} might say next. The response should fit the context and flow of the conversation. ONLY SPEAK FOR {user_name_val}.""" - - # Prepare chat context - media_content = { - 'id': char_name, - 'title': char_name, - 'content': char_background, - 'description': char_data.get('description', ''), - 'personality': char_data.get('personality', ''), - 'scenario': char_data.get('scenario', '') - } - selected_parts = ['description', 'personality', 'scenario'] - - # Generate user response - user_response = chat( - "", # No new message - history, - media_content, - selected_parts, - api_endpoint, - api_key, - prompt="", - temperature=temperature, - system_message=system_message_user - ) + # Auto-save if enabled + save_status = "" + if auto_save: + character_id = char_data.get('id') + if character_id: + conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + add_character_chat(character_id, conversation_name, history) + save_status = "Chat auto-saved." + else: + save_status = "Character ID not found; chat not saved." + + return history, save_status + + + # Define States for conversation_id and media_content, which are required for saving chat history + conversation_id = gr.State(str(uuid.uuid4())) + media_content = gr.State({}) + # TTS Generation and Playback + def speak_last_response(chat_history): + """Handle speaking the last chat response.""" + logging.debug("Starting speak_last_response") + try: + # If there's no chat history, return + if not chat_history or len(chat_history) == 0: + logging.debug("No messages in chat history") + return gr.update(value="No messages to speak", visible=True) + + # Log the chat history content for debugging + logging.debug(f"Chat history: {chat_history}") + + # Get the last message from the assistant + last_message = chat_history[-1][1] # Second element of the last tuple + logging.debug(f"Last message to speak: {last_message}") + + # Update status to generating + yield gr.update(value="Generating audio...", visible=True) + + # Generate audio using your preferred TTS provider + try: + audio_file = generate_audio( + text=last_message, + provider="openai", # or get from config + output_file="last_response.mp3", + api_key=None + ) + logging.debug(f"Generated audio file: {audio_file}") + except Exception as e: + logging.error(f"Failed to generate audio: {e}") + yield gr.update(value=f"Failed to generate audio: {str(e)}", visible=True) + return + + # Update status to playing + yield gr.update(value="Playing audio...", visible=True) + + # Play the audio + if audio_file and os.path.exists(audio_file): + try: + play_audio_file(audio_file) + yield gr.update(value="Finished playing audio", visible=True) + except Exception as e: + logging.error(f"Failed to play audio: {e}") + yield gr.update(value=f"Failed to play audio: {str(e)}", visible=True) + else: + logging.error("Audio file not found") + yield gr.update(value="Failed: Audio file not found", visible=True) - # Append the generated user response to history - history.append((user_response, None)) - - # Now generate the character's response to this user response - # Prepare the system message for the character - system_message_bot = f"""You are roleplaying as {char_name}. {char_data.get('system_prompt', '')}""" - - bot_message = chat( - f"{user_name_val}: {user_response}", - history[:-1], - media_content, - selected_parts, - api_endpoint, - api_key, - prompt=char_data.get('post_history_instructions', ''), - temperature=temperature, - system_message=system_message_bot + except Exception as e: + logging.error(f"Error in speak_last_response: {str(e)}") + yield gr.update(value=f"Error: {str(e)}", visible=True) + + # Button Callbacks + speak_button.click( + fn=speak_last_response, + inputs=[chat_history], # Use chat_history instead of chatbot + outputs=[tts_status], + api_name="speak_response" + ) + # Add the new button callbacks here + answer_for_me_button.click( + fn=answer_for_me, + inputs=[ + chat_history, + character_data, + api_name_input, + api_key_input, + temperature_slider, + user_name_input, + auto_save_checkbox, + streaming, + minp_slider, + maxp_slider + ], + outputs=[chat_history, save_status] + ).then( + lambda history: approximate_token_count(history), + inputs=[chat_history], + outputs=[token_count_display] ) - # Replace placeholders in bot message - bot_message = replace_placeholders(bot_message, char_name, user_name_val) - - # Update history with bot's response - history[-1] = (user_response, bot_message) - - # Auto-save if enabled - save_status = "" - if auto_save: - character_id = char_data.get('id') - if character_id: - conversation_name = f"Auto-saved chat {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - add_character_chat(character_id, conversation_name, history) - save_status = "Chat auto-saved." - else: - save_status = "Character ID not found; chat not saved." - - return history, save_status - - - # Define States for conversation_id and media_content, which are required for saving chat history - conversation_id = gr.State(str(uuid.uuid4())) - media_content = gr.State({}) - - # Button Callbacks - - # Add the new button callbacks here - answer_for_me_button.click( - fn=answer_for_me, - inputs=[ - chat_history, - character_data, - api_name_input, - api_key_input, - temperature_slider, - user_name_input, - auto_save_checkbox - ], - outputs=[chat_history, save_status] - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) - - continue_talking_button.click( - fn=continue_talking, - inputs=[ - chat_history, - character_data, - api_name_input, - api_key_input, - temperature_slider, - user_name_input, - auto_save_checkbox - ], - outputs=[chat_history, save_status] - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) - - import_card_button.click( - fn=import_character_card, - inputs=[character_card_upload], - outputs=[character_data, character_dropdown, save_status] - ) + continue_talking_button.click( + fn=continue_talking, + inputs=[ + chat_history, + character_data, + api_name_input, + api_key_input, + temperature_slider, + user_name_input, + auto_save_checkbox, + streaming, + minp_slider, + maxp_slider + ], + outputs=[chat_history, save_status] + ).then( + lambda history: approximate_token_count(history), + inputs=[chat_history], + outputs=[token_count_display] + ) - load_characters_button.click( - fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]), - outputs=character_dropdown - ) + import_card_button.click( + fn=import_character_card, + inputs=[character_card_upload], + outputs=[character_data, character_dropdown, save_status] + ) - # FIXME user_name_val = validate_user_name(user_name_val) - clear_chat_button.click( - fn=clear_chat_history, - inputs=[character_data, user_name_input], - outputs=[chat_history, character_data] - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) + load_characters_button.click( + fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]), + outputs=character_dropdown + ) - character_dropdown.change( - fn=extract_character_id, - inputs=[character_dropdown], - outputs=character_data - ).then( - fn=load_character_wrapper, - inputs=[character_data, user_name_input], - outputs=[character_data, chat_history, character_image] - ) + # FIXME user_name_val = validate_user_name(user_name_val) + clear_chat_button.click( + fn=clear_chat_history, + inputs=[character_data, user_name_input], + outputs=[chat_history, character_data] + ).then( + lambda history: approximate_token_count(history), + inputs=[chat_history], + outputs=[token_count_display] + ) - send_message_button.click( - fn=character_chat_wrapper, - inputs=[ - user_input, - chat_history, - character_data, - api_name_input, - api_key_input, - temperature_slider, - user_name_input, - auto_save_checkbox - ], - outputs=[chat_history, save_status] - ).then( - lambda: "", outputs=user_input - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) + character_dropdown.change( + fn=extract_character_id, + inputs=[character_dropdown], + outputs=character_data + ).then( + fn=load_character_wrapper, + inputs=[character_data, user_name_input], + outputs=[character_data, chat_history, character_image] + ) - regenerate_button.click( - fn=regenerate_last_message, - inputs=[ - chat_history, - character_data, - api_name_input, - api_key_input, - temperature_slider, - user_name_input, - auto_save_checkbox - ], - outputs=[chat_history, save_status] - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) + send_message_button.click( + fn=character_chat_wrapper, + inputs=[ + user_input, + chat_history, + character_data, + api_name_input, + api_key_input, + temperature_slider, + user_name_input, + auto_save_checkbox, + streaming, + minp_slider, + maxp_slider + ], + outputs=[chat_history, save_status], + ).then( + lambda: "", outputs=user_input # Clear the input box after sending + ).then( + lambda history: approximate_token_count(history), + inputs=[chat_history], + outputs=[token_count_display] + ) - import_chat_button.click( - fn=lambda: gr.update(visible=True), - outputs=chat_file_upload - ) + regenerate_button.click( + fn=regenerate_last_message, + inputs=[ + chat_history, + character_data, + api_name_input, + api_key_input, + temperature_slider, + user_name_input, + auto_save_checkbox, + streaming, + minp_slider, + maxp_slider + ], + outputs=[chat_history, save_status] + ).then( + lambda history: approximate_token_count(history) if history is not None else 0, + inputs=[chat_history], + outputs=[token_count_display] + ) - chat_file_upload.change( - fn=import_chat_history, - inputs=[chat_file_upload, chat_history, character_data, user_name_input], - outputs=[chat_history, character_data, save_status] - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) + import_chat_button.click( + fn=lambda: gr.update(visible=True), + outputs=chat_file_upload + ) - save_chat_history_to_db.click( - fn=save_chat_history_to_db_wrapper, - inputs=[ - chat_history, - conversation_id, - media_content, - chat_media_name, - character_data, - auto_save_checkbox # Pass the auto_save state - ], - outputs=[conversation_id, save_status] - ) + chat_file_upload.change( + fn=import_chat_history, + inputs=[chat_file_upload, chat_history, character_data, user_name_input], + outputs=[chat_history, character_data, save_status] + ).then( + lambda history: approximate_token_count(history), + inputs=[chat_history], + outputs=[token_count_display] + ) - # Populate the update_chat_dropdown based on selected character - character_dropdown.change( - fn=select_chat_for_update, - inputs=[], - outputs=[update_chat_dropdown, save_status] - ) + save_chat_history_to_db.click( + fn=save_chat_history_to_db_wrapper, + inputs=[ + chat_history, + conversation_id, + media_content, + chat_media_name, + character_data, + auto_save_checkbox # Pass the auto_save state + ], + outputs=[conversation_id, save_status] + ) - load_selected_chat_button.click( - fn=load_selected_chat, - inputs=[update_chat_dropdown], - outputs=[chat_history, save_status] - ) + # Populate the update_chat_dropdown based on selected character + character_dropdown.change( + fn=select_chat_for_update, + inputs=[], + outputs=[update_chat_dropdown, save_status] + ) - save_snapshot_button.click( - fn=save_untracked_chat_action, - inputs=[chat_history, character_data], - outputs=save_status - ) + load_selected_chat_button.click( + fn=load_selected_chat, + inputs=[update_chat_dropdown], + outputs=[chat_history, save_status] + ) - update_chat_button.click( - fn=update_chat, - inputs=[selected_chat_id, chat_history], - outputs=save_status - ) + save_snapshot_button.click( + fn=save_untracked_chat_action, + inputs=[chat_history, character_data], + outputs=save_status + ) - # Search Chats - chat_search_button.click( - fn=search_existing_chats, - inputs=[chat_search_query], - outputs=[chat_search_dropdown, save_status] - ).then( - fn=lambda choices, msg: gr.update(choices=choices, visible=True) if choices else gr.update(visible=False), - inputs=[chat_search_dropdown, save_status], - outputs=[chat_search_dropdown] - ) + update_chat_button.click( + fn=update_chat, + inputs=[selected_chat_id, chat_history], + outputs=save_status + ) - # Load Selected Chat from Search - load_chat_button.click( - fn=load_selected_chat_from_search, - inputs=[chat_search_dropdown, user_name_input], - outputs=[character_data, chat_history, character_image, save_status] - ).then( - lambda history: approximate_token_count(history), - inputs=[chat_history], - outputs=[token_count_display] - ) + # Search Chats + chat_search_button.click( + fn=search_existing_chats, + inputs=[chat_search_query], + outputs=[chat_search_dropdown, save_status] + ).then( + fn=lambda choices, msg: gr.update(choices=choices, visible=True) if choices else gr.update(visible=False), + inputs=[chat_search_dropdown, save_status], + outputs=[chat_search_dropdown] + ) - # Show Load Chat Button when a chat is selected - chat_search_dropdown.change( - fn=lambda selected: gr.update(visible=True) if selected else gr.update(visible=False), - inputs=[chat_search_dropdown], - outputs=[load_chat_button] - ) + # Load Selected Chat from Search + load_chat_button.click( + fn=load_selected_chat_from_search, + inputs=[chat_search_dropdown, user_name_input], + outputs=[character_data, chat_history, character_image, save_status] + ).then( + lambda history: approximate_token_count(history), + inputs=[chat_history], + outputs=[token_count_display] + ) + # Show Load Chat Button when a chat is selected + chat_search_dropdown.change( + fn=lambda selected: gr.update(visible=True) if selected else gr.update(visible=False), + inputs=[chat_search_dropdown], + outputs=[load_chat_button] + ) - return character_data, chat_history, user_input, user_name, character_image + return character_data, chat_history, user_input, user_name, character_image def create_character_chat_mgmt_tab(): @@ -1589,7 +1777,11 @@ def export_all_character_conversations(character_selection): load_characters_button.click( fn=lambda: gr.update(choices=[f"{char['name']} (ID: {char['id']})" for char in get_character_cards()]), outputs=select_character - ) + ).then( + lambda choices: print(f"Dropdown choices: {choices}"), # Debugging line + inputs=[select_character], + outputs=None + ) export_chat_button.click( fn=export_current_conversation, diff --git a/App_Function_Libraries/Gradio_UI/Chat_ui.py b/App_Function_Libraries/Gradio_UI/Chat_ui.py index 6df00168d..f728ab344 100644 --- a/App_Function_Libraries/Gradio_UI/Chat_ui.py +++ b/App_Function_Libraries/Gradio_UI/Chat_ui.py @@ -20,7 +20,11 @@ from App_Function_Libraries.DB.RAG_QA_Chat_DB import get_db_connection from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_user_prompt from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram -from App_Function_Libraries.Utils.Utils import default_api_endpoint, format_api_name, global_api_endpoints +from App_Function_Libraries.TTS.TTS_Providers import generate_audio, play_mp3 +from App_Function_Libraries.Utils.Utils import default_api_endpoint, format_api_name, global_api_endpoints, \ + loaded_config_data + + # # ######################################################################################################################## @@ -42,8 +46,8 @@ def show_delete_message(selected): def debug_output(media_content, selected_parts): - print(f"Debug - Media Content: {media_content}") - print(f"Debug - Selected Parts: {selected_parts}") + logging.debug(f"Debug - Media Content: {media_content}") + logging.debug(f"Debug - Selected Parts: {selected_parts}") return "" @@ -55,7 +59,7 @@ def update_selected_parts(use_content, use_summary, use_prompt): selected_parts.append("summary") if use_prompt: selected_parts.append("prompt") - print(f"Debug - Update Selected Parts: {selected_parts}") + logging.debug(f"Debug - Update Selected Parts: {selected_parts}") return selected_parts @@ -85,7 +89,7 @@ def clear_chat_single(): # FIXME - add additional features.... def chat_wrapper(message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, conversation_id, - save_conversation, temperature, system_prompt, max_tokens=None, top_p=None, frequency_penalty=None, + save_conversation, temperature, system_prompt, streaming=False, max_tokens=None, top_p=None, frequency_penalty=None, presence_penalty=None, stop_sequence=None): try: if save_conversation: @@ -107,22 +111,21 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint, full_message = message # Generate bot response - bot_message = chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, - temperature, system_prompt) - - logging.debug(f"Bot message being returned: {bot_message}") + bot_message = "" + for chunk in chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, + temperature, system_prompt, streaming): + bot_message += chunk # Accumulate the streamed response + logging.debug(f"Bot message being returned: {bot_message}") + # Yield the incremental response and updated history + yield bot_message, history + [(message, bot_message)], conversation_id if save_conversation: # Add assistant message to the database save_message(conversation_id, role="assistant", content=bot_message) - # Update history - new_history = history + [(message, bot_message)] - - return bot_message, new_history, conversation_id except Exception as e: logging.error(f"Error in chat wrapper: {str(e)}") - return "An error occurred.", history, conversation_id + yield "An error occurred.", history, conversation_id def search_conversations(query): @@ -174,38 +177,46 @@ def load_conversation(conversation_id): def regenerate_last_message(history, media_content, selected_parts, api_endpoint, api_key, custom_prompt, temperature, - system_prompt): + system_prompt, streaming=False): if not history: return history, "No messages to regenerate." - last_entry = history[-1] - last_user_message, last_bot_message = last_entry - - if last_bot_message is None: - return history, "The last message is not from the bot." + logging.debug("Starting regenerate_last_message") - new_history = history[:-1] + # Find the last user message and its corresponding bot response + last_user_message = None + last_bot_message = None + for i in range(len(history) - 1, -1, -1): + if history[i][0]: # This is a user message + last_user_message = history[i][0] + if i + 1 < len(history): + last_bot_message = history[i + 1][1] + break if not last_user_message: - return new_history, "No user message to regenerate the bot response." - - full_message = last_user_message - - bot_message = chat( - full_message, - new_history, - media_content, - selected_parts, - api_endpoint, - api_key, - custom_prompt, - temperature, - system_prompt - ) + return history, "No user message found to regenerate the bot response." - new_history.append((last_user_message, bot_message)) + # Remove the last bot message from history + new_history = history[:-1] if last_bot_message else history - return new_history, "Last message regenerated successfully." + # Generate the new bot response + bot_message = "" + for chunk in chat(last_user_message, new_history, media_content, selected_parts, api_endpoint, api_key, + custom_prompt, temperature, system_prompt, streaming): + if isinstance(chunk, str): + bot_message += chunk + elif isinstance(chunk, dict) and "choices" in chunk: + content = chunk["choices"][0].get("delta", {}).get("content", "") + bot_message += content + + # Update the chatbot interface with the partial response + new_history_with_regenerated = new_history + [(last_user_message, bot_message)] + yield new_history_with_regenerated, "Regenerating..." + + # Update the history with the final regenerated message + new_history_with_regenerated = new_history + [(last_user_message, bot_message)] + logging.debug("Finished regenerating message") + yield new_history_with_regenerated, "Last message regenerated successfully." def update_dropdown_multiple(query, search_type, keywords=""): @@ -261,6 +272,24 @@ def create_chat_interface(): .chatbot-container .message-wrap .message { font-size: 14px !important; } + #tts-status { + font-weight: bold; + padding: 5px; + border-radius: 4px; + margin-top: 5px; + } + #tts-status[value*="Error"], #tts-status[value*="Failed"] { + color: red; + background-color: #ffe6e6; + } + #tts-status[value*="Generating"], #tts-status[value*="Playing"] { + color: #0066cc; + background-color: #e6f2ff; + } + #tts-status[value*="Finished"] { + color: green; + background-color: #e6ffe6; + } """ with gr.TabItem("Remote LLM Chat (Horizontal)", visible=True): gr.Markdown("# Chat with a designated LLM Endpoint, using your selected item as starting context") @@ -342,16 +371,23 @@ def create_chat_interface(): with gr.Column(scale=2): chatbot = gr.Chatbot(height=800, elem_classes="chatbot-container") msg = gr.Textbox(label="Enter your message") + streaming = gr.Checkbox(label="Streaming", value=False, visible=True) submit = gr.Button("Submit") + with gr.Row(): + speak_button = gr.Button("Speak Response") + tts_status = gr.Textbox(label="TTS Status", interactive=False) regenerate_button = gr.Button("Regenerate Last Message") - token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False) - clear_chat_button = gr.Button("Clear Chat") + with gr.Row(): + token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False) + clear_chat_button = gr.Button("Clear Chat") chat_media_name = gr.Textbox(label="Custom Chat Name(optional)") - save_chat_history_to_db = gr.Button("Save Chat History to DataBase") - save_status = gr.Textbox(label="Save Status", interactive=False) - save_chat_history_as_file = gr.Button("Save Chat History as File") - download_file = gr.File(label="Download Chat History") + with gr.Row(): + save_chat_history_to_db = gr.Button("Save Chat History to DataBase") + save_status = gr.Textbox(label="Save Status", interactive=False) + with gr.Row(): + save_chat_history_as_file = gr.Button("Save Chat History as File") + download_file = gr.File(label="Download Chat History") # Restore original functionality search_button.click( @@ -417,6 +453,64 @@ def on_preset_prompt_checkbox_change(is_checked): outputs=[preset_prompt, prev_page_button, next_page_button, page_display, current_page_state, total_pages_state] ) + # TTS Generation and Playback + def speak_last_response(chatbot): + """Handle speaking the last chat response.""" + logging.debug("Starting speak_last_response") + try: + # If there's no chat history, return + if not chatbot or len(chatbot) == 0: + logging.debug("No messages in chatbot history") + return gr.update(value="No messages to speak", visible=True) + + # Log the chatbot content for debugging + logging.debug(f"Chatbot history: {chatbot}") + + # Get the last message from the assistant + last_message = chatbot[-1][1] + logging.debug(f"Last message to speak: {last_message}") + + # Update status to generating + yield gr.update(value="Generating audio...", visible=True) + + # Generate audio using your preferred TTS provider + try: + audio_file = generate_audio( + api_key=None, # Use default API key + text=last_message, + provider="openai", # or get from config + output_file="last_response.mp3" + ) + logging.debug(f"Generated audio file: {audio_file}") + except Exception as e: + logging.error(f"Failed to generate audio: {e}") + yield gr.update(value=f"Failed to generate audio: {str(e)}", visible=True) + return + + # Update status to playing + yield gr.update(value="Playing audio...", visible=True) + + # Play the audio + if audio_file and os.path.exists(audio_file): + try: + play_mp3(audio_file) + yield gr.update(value="Finished playing audio", visible=True) + except Exception as e: + logging.error(f"Failed to play audio: {e}") + yield gr.update(value=f"Failed to play audio: {str(e)}", visible=True) + else: + logging.error("Audio file not found") + yield gr.update(value="Failed: Audio file not found", visible=True) + + except Exception as e: + logging.error(f"Error in speak_last_response: {str(e)}") + yield gr.update(value=f"Error: {str(e)}", visible=True) + speak_button.click( + fn=speak_last_response, + inputs=[chatbot], + outputs=[tts_status], + api_name="speak_response" + ) def on_prev_page_click(current_page, total_pages): new_page = max(current_page - 1, 1) prompts, total_pages, current_page = list_prompts(page=new_page, per_page=20) @@ -456,7 +550,7 @@ def on_next_page_click(current_page, total_pages): submit.click( chat_wrapper, inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id, - save_conversation, temperature, system_prompt_input], + save_conversation, temperature, system_prompt_input, streaming], outputs=[msg, chatbot, conversation_id] ).then( # Clear the message box after submission lambda x: gr.update(value=""), @@ -521,8 +615,8 @@ def on_next_page_click(current_page, total_pages): regenerate_button.click( regenerate_last_message, inputs=[chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, temperature, - system_prompt_input], - outputs=[chatbot, save_status] + system_prompt_input, streaming], + outputs=[chatbot, gr.Textbox(label="Regenerate Status")] ).then( lambda history: approximate_token_count(history), inputs=[chatbot], @@ -577,7 +671,7 @@ def create_chat_interface_stacked(): use_summary = gr.Checkbox(label="Use Summary") use_prompt = gr.Checkbox(label="Use Prompt") save_conversation = gr.Checkbox(label="Save Conversation", value=False, visible=True) - temp = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7) + temp = gr.Slider(label="Temperature", minimum=0.00, maximum=2.0, step=0.05, value=0.7) with gr.Row(): conversation_search = gr.Textbox(label="Search Conversations") with gr.Row(): @@ -608,6 +702,10 @@ def create_chat_interface_stacked(): value=False, visible=True ) + streaming = gr.Checkbox(label="Streaming", + value=False, + visible=True + ) with gr.Row(): preset_prompt = gr.Dropdown( @@ -640,15 +738,21 @@ def create_chat_interface_stacked(): with gr.Row(): with gr.Column(): submit = gr.Button("Submit") + with gr.Row(): + speak_button = gr.Button("Speak Response") + tts_status = gr.Textbox(label="TTS Status", interactive=False) regenerate_button = gr.Button("Regenerate Last Message") - token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False) - clear_chat_button = gr.Button("Clear Chat") - chat_media_name = gr.Textbox(label="Custom Chat Name(optional)", visible=True) - save_chat_history_to_db = gr.Button("Save Chat History to DataBase") - save_status = gr.Textbox(label="Save Status", interactive=False) - save_chat_history_as_file = gr.Button("Save Chat History as File") - with gr.Column(): - download_file = gr.File(label="Download Chat History") + with gr.Row(): + token_count_display = gr.Number(label="Approximate Token Count", value=0, interactive=False) + clear_chat_button = gr.Button("Clear Chat") + + chat_media_name = gr.Textbox(label="Custom Chat Name(optional)") + with gr.Row(): + save_chat_history_to_db = gr.Button("Save Chat History to DataBase") + save_status = gr.Textbox(label="Save Status", interactive=False) + with gr.Row(): + save_chat_history_as_file = gr.Button("Save Chat History as File") + download_file = gr.File(label="Download Chat History") # Restore original functionality search_button.click( @@ -826,7 +930,64 @@ def on_next_page_click(current_page, total_pages): inputs=[current_page_state, total_pages_state], outputs=[preset_prompt, page_display, current_page_state] ) + # TTS Generation and Playback + def speak_last_response(chatbot): + """Handle speaking the last chat response.""" + logging.debug("Starting speak_last_response") + try: + # If there's no chat history, return + if not chatbot or len(chatbot) == 0: + logging.debug("No messages in chatbot history") + return gr.update(value="No messages to speak", visible=True) + + # Log the chatbot content for debugging + logging.debug(f"Chatbot history: {chatbot}") + + # Get the last message from the assistant + last_message = chatbot[-1][1] + logging.debug(f"Last message to speak: {last_message}") + + # Update status to generating + yield gr.update(value="Generating audio...", visible=True) + + # Generate audio using your preferred TTS provider + try: + audio_file = generate_audio( + api_key=None, # Use default API key + text=last_message, + provider="openai", # or get from config + output_file="last_response.mp3" + ) + logging.debug(f"Generated audio file: {audio_file}") + except Exception as e: + logging.error(f"Failed to generate audio: {e}") + yield gr.update(value=f"Failed to generate audio: {str(e)}", visible=True) + return + + # Update status to playing + yield gr.update(value="Playing audio...", visible=True) + + # Play the audio + if audio_file and os.path.exists(audio_file): + try: + play_mp3(audio_file) + yield gr.update(value="Finished playing audio", visible=True) + except Exception as e: + logging.error(f"Failed to play audio: {e}") + yield gr.update(value=f"Failed to play audio: {str(e)}", visible=True) + else: + logging.error("Audio file not found") + yield gr.update(value="Failed: Audio file not found", visible=True) + except Exception as e: + logging.error(f"Error in speak_last_response: {str(e)}") + yield gr.update(value=f"Error: {str(e)}", visible=True) + speak_button.click( + fn=speak_last_response, + inputs=[chatbot], + outputs=[tts_status], + api_name="speak_response" + ) # Update prompts when a preset is selected preset_prompt.change( update_prompts, @@ -837,7 +998,7 @@ def on_next_page_click(current_page, total_pages): submit.click( chat_wrapper, inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, - conversation_id, save_conversation, temp, system_prompt], + conversation_id, save_conversation, temp, system_prompt, streaming], outputs=[msg, chatbot, conversation_id] ).then( lambda x: gr.update(value=""), diff --git a/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py b/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py index 0c88aaeb3..4ad2ac0e1 100644 --- a/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py +++ b/App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py @@ -28,6 +28,7 @@ from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer, enhanced_rag_pipeline from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat +from App_Function_Libraries.TTS.TTS_Providers import play_mp3, generate_audio from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name, \ load_comprehensive_config @@ -199,6 +200,9 @@ def update_conversation_list(): with gr.Column(scale=2): chatbot = gr.Chatbot(height=700) msg = gr.Textbox(label="Enter your message") + with gr.Row(): + speak_button = gr.Button("Speak Response") + tts_status = gr.Textbox(label="TTS Status", interactive=False) submit = gr.Button("Submit") clear_chat = gr.Button("Clear Chat History") @@ -789,6 +793,65 @@ def clear_chat_history(): outputs=[chatbot, msg, rating_display, rating_input] ) + # TTS Generation and Playback + def speak_last_response(chatbot): + """Handle speaking the last chat response.""" + logging.debug("Starting speak_last_response") + try: + # If there's no chat history, return + if not chatbot or len(chatbot) == 0: + logging.debug("No messages in chatbot history") + return gr.update(value="No messages to speak", visible=True) + + # Log the chatbot content for debugging + logging.debug(f"Chatbot history: {chatbot}") + + # Get the last message from the assistant + last_message = chatbot[-1][1] + logging.debug(f"Last message to speak: {last_message}") + + # Update status to generating + yield gr.update(value="Generating audio...", visible=True) + + # Generate audio using your preferred TTS provider + try: + audio_file = generate_audio( + api_key=None, # Use default API key + text=last_message, + provider="openai", # or get from config + output_file="last_response.mp3" + ) + logging.debug(f"Generated audio file: {audio_file}") + except Exception as e: + logging.error(f"Failed to generate audio: {e}") + yield gr.update(value=f"Failed to generate audio: {str(e)}", visible=True) + return + + # Update status to playing + yield gr.update(value="Playing audio...", visible=True) + + # Play the audio + if audio_file and os.path.exists(audio_file): + try: + play_mp3(audio_file) + yield gr.update(value="Finished playing audio", visible=True) + except Exception as e: + logging.error(f"Failed to play audio: {e}") + yield gr.update(value=f"Failed to play audio: {str(e)}", visible=True) + else: + logging.error("Audio file not found") + yield gr.update(value="Failed: Audio file not found", visible=True) + + except Exception as e: + logging.error(f"Error in speak_last_response: {str(e)}") + yield gr.update(value=f"Error: {str(e)}", visible=True) + speak_button.click( + fn=speak_last_response, + inputs=[chatbot], + outputs=[tts_status], + api_name="speak_response" + ) + return ( context_source, existing_file, diff --git a/App_Function_Libraries/Gradio_UI/Video_transcription_tab.py b/App_Function_Libraries/Gradio_UI/Video_transcription_tab.py index 51963c6e4..520688380 100644 --- a/App_Function_Libraries/Gradio_UI/Video_transcription_tab.py +++ b/App_Function_Libraries/Gradio_UI/Video_transcription_tab.py @@ -956,6 +956,8 @@ def process_url_with_metadata(input_item, num_speakers, whisper_model, custom_pr def toggle_confabulation_output(checkbox_value): return gr.update(visible=checkbox_value) + + confab_checkbox.change( fn=toggle_confabulation_output, inputs=[confab_checkbox], diff --git a/App_Function_Libraries/Gradio_UI/WebSearch_tab.py b/App_Function_Libraries/Gradio_UI/WebSearch_tab.py new file mode 100644 index 000000000..3ad08922e --- /dev/null +++ b/App_Function_Libraries/Gradio_UI/WebSearch_tab.py @@ -0,0 +1,341 @@ +# WebSearch_tab.py +# Gradio UI for performing web searches with aggregated results +# +# Imports +import asyncio +import logging +from typing import Dict + +# +# External Imports +import gradio as gr + +from App_Function_Libraries.Utils.Utils import loaded_config_data +# +# Local Imports +from App_Function_Libraries.Web_Scraping.WebSearch_APIs import generate_and_search, analyze_and_aggregate +# +######################################################################################################################## +# +# Functions: +def create_websearch_tab(): + with gr.TabItem("Web Search & Review"): + with gr.Blocks() as interface: + search_state = gr.State(value=None) + # Basic styling + gr.HTML(""" + + """) + + # Input Section + with gr.Row(): + with gr.Column(): + query = gr.Textbox( + label="Search Query", + placeholder="What would you like to search for?", + lines=2 + ) + + with gr.Row(): + engine = gr.Dropdown( + choices=["google", "bing", "duckduckgo", "brave"], + value="google", + label="Search Engine" + ) + num_results = gr.Slider( + minimum=1, maximum=20, value=10, step=1, + label="Number of Results" + ) + + with gr.Row(): + country = gr.Dropdown( + choices=["US", "UK", "CA", "AU"], + value="US", + label="Content Region" + ) + language = gr.Dropdown( + choices=["en", "es", "fr", "de"], + value="en", + label="Language" + ) + + # Action Buttons and Status + with gr.Row(): + search_btn = gr.Button("Search", variant="primary") + status = gr.Markdown("Ready") + + # Results Section + results_display = gr.HTML(visible=False) + # Analysis button and status container + with gr.Row() as analyze_container: + analyze_btn = gr.Button("Analyze Selected Results", visible=False) + analysis_status = gr.HTML( + """ +
+
+ +
+ """, + visible=False + ) + + # Final Output Section + with gr.Column(visible=False) as output_section: + # Single markdown box for all analysis text + answer = gr.Markdown( + label="Analysis Results", + elem_classes="analysis-text" + ) + # Sources box + sources = gr.JSON(label="Sources") + + def format_results(results: list) -> str: + """Format search results as HTML.""" + html = "" + for idx, result in enumerate(results): + html += f""" +
+ +
{result.get('title', 'No title')}
+
{result.get('url', 'No URL')}
+
{result.get('content', 'No content')[:200]}...
+
+ """ + return html + + relevance_analysis_llm = loaded_config_data['search_settings']["relevance_analysis_llm"] + final_answer_llm = loaded_config_data['search_settings']["final_answer_llm"] + + def perform_search(query: str, engine: str, num_results: int, + country: str, language: str) -> Dict: + """Execute the search operation.""" + search_params = { + "engine": engine, + "content_country": country, + "search_lang": language, + "output_lang": language, + "result_count": num_results, + # Add LLM settings + "relevance_analysis_llm": relevance_analysis_llm, + "final_answer_llm": final_answer_llm + } + + return generate_and_search(query, search_params) + + def search_handler(query, engine, num_results, country, language): + try: + # Call perform_search with individual arguments + results = perform_search( + query=query, + engine=engine, + num_results=num_results, + country=country, + language=language + ) + + logging.debug(f"Search results: {results}") + + if not results.get("web_search_results_dict") or not results["web_search_results_dict"].get( + "results"): + raise ValueError("No search results returned") + + results_html = format_results(results["web_search_results_dict"]["results"]) + + # Store complete results including search params + state_to_store = { + "web_search_results_dict": results["web_search_results_dict"], + "sub_query_dict": results.get("sub_query_dict", {}), + "search_params": { + "engine": engine, + "content_country": country, + "search_lang": language, + "output_lang": language, + "relevance_analysis_llm": relevance_analysis_llm, + "final_answer_llm": final_answer_llm + } + } + + logging.info( + f"Storing state with {len(state_to_store['web_search_results_dict']['results'])} results") + + return ( + gr.Markdown("Search completed successfully"), + gr.HTML(results_html, visible=True), + gr.Button(visible=True), + gr.HTML(visible=True), + gr.Column(visible=False), + state_to_store + ) + except Exception as e: + logging.error(f"Search error: {str(e)}", exc_info=True) + return ( + gr.Markdown(f"Error: {str(e)}"), + gr.HTML(visible=False), + gr.Button(visible=False), + gr.HTML(visible=False), + gr.Column(visible=False), + None + ) + + async def analyze_handler(state): + logging.debug(f"Received state for analysis: {state}") + try: + yield ( + gr.HTML( + """ +
+
+ Processing results... +
+ """, + visible=True + ), + gr.Markdown("Analysis in progress..."), + gr.JSON(None), + gr.Column(visible=False) + ) + + if not state or not isinstance(state, dict): + raise ValueError(f"Invalid state received: {state}") + + if not state.get("web_search_results_dict"): + raise ValueError("No web search results in state") + + if not state["web_search_results_dict"].get("results"): + raise ValueError("No results array in web search results") + + relevance_analysis_llm = loaded_config_data['search_settings']["relevance_analysis_llm"] + final_answer_llm = loaded_config_data['search_settings']["final_answer_llm"] + + # Create search params with required LLM settings + search_params = { + "engine": state["web_search_results_dict"]["search_engine"], + "content_country": state["web_search_results_dict"]["content_country"], + "search_lang": state["web_search_results_dict"]["search_lang"], + "output_lang": state["web_search_results_dict"]["output_lang"], + # Add LLM settings + "relevance_analysis_llm": relevance_analysis_llm, + "final_answer_llm": final_answer_llm + } + + # Analyze results + analysis = await analyze_and_aggregate( + state["web_search_results_dict"], + state.get("sub_query_dict", {}), + state.get("search_params", {}) + ) + + logging.debug(f"Analysis results: {analysis}") + + if not analysis.get("final_answer"): + raise ValueError("Analysis did not produce a final answer") + + # Format the raw report with proper markdown + raw_report = analysis["final_answer"]["Report"] + + # Ensure proper markdown formatting + formatted_answer = raw_report.replace('\n', '\n\n') # Double line breaks + formatted_answer = formatted_answer.replace('•', '\n•') # Bullet points on new lines + formatted_answer = formatted_answer.replace('- ', '\n- ') # Dashed lists on new lines + + # Handle numbered lists (assumes numbers followed by period or parenthesis) + import re + formatted_answer = re.sub(r'(\d+[\)\.]) ', r'\n\1 ', formatted_answer) + + # Clean up any triple+ line breaks + formatted_answer = re.sub(r'\n{3,}', '\n\n', formatted_answer) + + yield ( + gr.HTML( + """ +
+ ✓ Analysis complete +
+ """, + visible=True + ), + gr.Markdown(formatted_answer), + analysis["final_answer"]["evidence"], + gr.Column(visible=True) + ) + except Exception as e: + logging.error(f"Analysis error: {str(e)}", exc_info=True) + yield ( + gr.HTML( + f""" +
+ ❌ Error: {str(e)} +
+ """, + visible=True + ), + gr.Markdown("Analysis failed"), + gr.JSON(None), + gr.Column(visible=False) + ) + + # Connect event handlers + search_btn.click( + fn=search_handler, + inputs=[ + query, + engine, + num_results, + country, + language, + ], + outputs=[ + status, + results_display, + analyze_btn, + analysis_status, + output_section, + search_state # Update state + ] + ) + + analyze_btn.click( + fn=analyze_handler, + inputs=[search_state], # Use the state + outputs=[ + analysis_status, + answer, + sources, + output_section + ] + ) + + return interface + +# +# End of File +######################################################################################################################## diff --git a/App_Function_Libraries/Gradio_UI/Website_scraping_tab.py b/App_Function_Libraries/Gradio_UI/Website_scraping_tab.py index 5204547e0..470b70a1b 100644 --- a/App_Function_Libraries/Gradio_UI/Website_scraping_tab.py +++ b/App_Function_Libraries/Gradio_UI/Website_scraping_tab.py @@ -10,14 +10,11 @@ from concurrent.futures import ThreadPoolExecutor from typing import Optional, List, Dict, Any from urllib.parse import urlparse, urljoin - # # External Imports import gradio as gr from playwright.async_api import TimeoutError, async_playwright from playwright.sync_api import sync_playwright - -from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name # # Local Imports from App_Function_Libraries.Web_Scraping.Article_Extractor_Lib import scrape_from_sitemap, scrape_by_url_level, \ @@ -25,8 +22,7 @@ from App_Function_Libraries.DB.DB_Manager import list_prompts from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize - - +from App_Function_Libraries.Utils.Utils import default_api_endpoint, global_api_endpoints, format_api_name # ######################################################################################################################## # @@ -408,7 +404,12 @@ def create_website_scraping_tab(): with gr.Column(): progress_output = gr.Textbox(label="Progress", lines=3) - result_output = gr.Textbox(label="Result", lines=20) + result_output = gr.Textbox( + label="Web Scraping Results", + lines=20, + elem_classes="scrollable-textbox", + show_copy_button=True + ) def update_ui_for_scrape_method(method): url_level_update = gr.update(visible=(method == "URL Level")) diff --git a/App_Function_Libraries/Gradio_UI/Workflows_tab.py b/App_Function_Libraries/Gradio_UI/Workflows_tab.py index 5c911d290..05530af21 100644 --- a/App_Function_Libraries/Gradio_UI/Workflows_tab.py +++ b/App_Function_Libraries/Gradio_UI/Workflows_tab.py @@ -20,7 +20,7 @@ # Functions: # Load workflows from a JSON file -json_path = Path('./Helper_Scripts/Workflows/Workflows.json') +json_path = Path('./App_Function_Libraries/Workflows/Workflows.json') with json_path.open('r') as f: workflows = json.load(f) diff --git a/App_Function_Libraries/LLM_API_Calls.py b/App_Function_Libraries/LLM_API_Calls.py index 148c1e476..1c8a7e3b8 100644 --- a/App_Function_Libraries/LLM_API_Calls.py +++ b/App_Function_Libraries/LLM_API_Calls.py @@ -71,7 +71,7 @@ def get_openai_embeddings(input_data: str, model: str) -> List[float]: List[float]: The embeddings generated by the API. """ loaded_config_data = load_and_log_configs() - api_key = loaded_config_data['api_keys']['openai'] + api_key = loaded_config_data['openai_api']['api_key'] if not api_key: logging.error("OpenAI: API key not found or is empty") @@ -116,7 +116,7 @@ def get_openai_embeddings(input_data: str, model: str) -> List[float]: raise ValueError(f"OpenAI: Unexpected error occurred: {str(e)}") -def chat_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None, streaming=False): +def chat_with_openai(api_key, input_data, custom_prompt_arg, temp, system_message, streaming, minp, maxp, model): loaded_config_data = load_and_log_configs() openai_api_key = api_key try: @@ -124,14 +124,33 @@ def chat_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_m if not openai_api_key: logging.info("OpenAI: API key not provided as parameter") logging.info("OpenAI: Attempting to use API key from config file") - openai_api_key = loaded_config_data['api_keys']['openai'] + openai_api_key = loaded_config_data['openai_api']['api_key'] - if not openai_api_key: + if not openai_api_key or openai_api_key == "": logging.error("OpenAI: API key not found or is empty") return "OpenAI: API Key Not Provided/Found in Config file or is empty" logging.debug(f"OpenAI: Using API Key: {openai_api_key[:5]}...{openai_api_key[-5:]}") + if isinstance(streaming, str): + streaming = streaming.lower() == "true" + elif isinstance(streaming, int): + streaming = bool(streaming) # Convert integers (1/0) to boolean + elif streaming is None: + streaming = loaded_config_data.get('openai_api', {}).get('streaming', False) + logging.debug("OpenAI: Streaming mode enabled") + else: + logging.debug("OpenAI: Streaming mode disabled") + if not isinstance(streaming, bool): + raise ValueError(f"Invalid type for 'streaming': Expected a boolean, got {type(streaming).__name__}") + + if maxp is None: + maxp = loaded_config_data['openai_api']['top_p'] + maxp = float(maxp) + if model is None: + openai_model = loaded_config_data['openai_api']['model'] or "gpt-4o" + logging.debug(f"OpenAI: Using model: {openai_model}") + # Input data handling logging.debug(f"OpenAI: Raw input data type: {type(input_data)}") logging.debug(f"OpenAI: Raw input data (first 500 chars): {str(input_data)[:500]}...") @@ -177,9 +196,6 @@ def chat_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_m logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...") logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}") - openai_model = loaded_config_data['models']['openai'] or "gpt-4o" - logging.debug(f"OpenAI: Using model: {openai_model}") - headers = { 'Authorization': f'Bearer {openai_api_key}', 'Content-Type': 'application/json' @@ -200,9 +216,10 @@ def chat_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_m {"role": "system", "content": system_message}, {"role": "user", "content": openai_prompt} ], - "max_tokens": 4096, + "max_completion_tokens": 4096, "temperature": temp, - "streaming": streaming + "stream": streaming, + "top_p": maxp } if streaming: logging.debug("OpenAI: Posting request (streaming") @@ -212,6 +229,7 @@ def chat_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_m json=data, stream=True ) + logging.debug(f"OpenAI: Response text: {response.text}") response.raise_for_status() def stream_generator(): @@ -266,7 +284,7 @@ def stream_generator(): return f"OpenAI: Unexpected error occurred: {str(e)}" -def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retries=3, retry_delay=5, system_prompt=None, temp=None, streaming=False): +def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retries=3, retry_delay=5, system_prompt=None, temp=None, streaming=False, minp=None, maxp=None): try: loaded_config_data = load_and_log_configs() @@ -284,7 +302,7 @@ def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retri logging.info("Anthropic: Attempting to use API key from config file") # Ensure 'api_keys' and 'anthropic' keys exist try: - anthropic_api_key = loaded_config_data['api_keys']['anthropic'] + anthropic_api_key = loaded_config_data['anthropic_api']['api_key'] logging.debug(f"Anthropic: Loaded API Key from config: {anthropic_api_key[:5]}...{anthropic_api_key[-5:]}") except (KeyError, TypeError) as e: logging.error(f"Anthropic: Error accessing API key from config: {str(e)}") @@ -310,9 +328,9 @@ def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retri logging.debug(f"AnthropicAI: Type of data: {type(input_data)}") # Retrieve the model from config if not provided - if not model: + if model is None: try: - anthropic_model = loaded_config_data['models']['anthropic'] + anthropic_model = loaded_config_data['anthropic_api']['model'] logging.debug(f"Anthropic: Loaded model from config: {anthropic_model}") except (KeyError, TypeError) as e: logging.error(f"Anthropic: Error accessing model from config: {str(e)}") @@ -322,9 +340,29 @@ def chat_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retri logging.debug(f"Anthropic: Using provided model: {anthropic_model}") if temp is None: - temp = 1.0 + temp = loaded_config_data['anthropic_api']['temperature'] + temp = float(temp) + logging.debug(f"Anthropic: Using temperature from config.txt: {temp}") + else: + temp = 0.7 logging.debug(f"Anthropic: Using default temperature: {temp}") + if minp is None: + minp = loaded_config_data['anthropic_api']['min_p'] + minp = float(minp) + logging.debug(f"Anthropic: Using min_p from config.txt: {minp}") + else: + minp = 0.0 + logging.debug(f"Anthropic: Using default min_p: {minp}") + + if maxp is None: + maxp = loaded_config_data['anthropic_api']['top_p'] + maxp = float(maxp) + logging.debug(f"Anthropic: Using max_p from config.txt: {maxp}") + else: + maxp = 1.0 + logging.debug(f"Anthropic: Using default maxp: {maxp}") + headers = { 'x-api-key': anthropic_api_key, 'anthropic-version': '2023-06-01', @@ -454,8 +492,8 @@ def chat_with_cohere(api_key, input_data, model=None, custom_prompt_arg=None, sy else: logging.info("Cohere Chat: API key not provided as parameter") logging.info("Cohere Chat: Attempting to use API key from config file") - logging.debug(f"Cohere Chat: Cohere API Key from config: {loaded_config_data['api_keys']['cohere']}") - cohere_api_key = loaded_config_data['api_keys']['cohere'] + logging.debug(f"Cohere Chat: Cohere API Key from config: {loaded_config_data['cohere_api']['api_key']}") + cohere_api_key = loaded_config_data['cohere_api']['api_key'] if cohere_api_key: logging.debug(f"Cohere Chat: Cohere API Key from config: {cohere_api_key[:3]}...{cohere_api_key[-3:]}") else: @@ -467,7 +505,7 @@ def chat_with_cohere(api_key, input_data, model=None, custom_prompt_arg=None, sy # Ensure model is set if not model: - model = loaded_config_data['models']['cohere'] + model = loaded_config_data['cohere_api']['model'] logging.debug(f"Cohere Chat: Using model: {model}") if temp is None: @@ -620,7 +658,7 @@ def chat_with_groq(api_key, input_data, custom_prompt_arg, temp=None, system_mes logging.info("Groq: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - groq_api_key = loaded_config_data['api_keys'].get('groq') + groq_api_key = loaded_config_data['groq_api'].get('api_Key') if groq_api_key: logging.info("Groq: Using API key from config file") else: @@ -634,6 +672,11 @@ def chat_with_groq(api_key, input_data, custom_prompt_arg, temp=None, system_mes logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}") + streaming = loaded_config_data['groq_api']['streaming'] + if streaming == "true" or "True": + streaming = True + else: + streaming = False # Transcript data handling & Validation if isinstance(input_data, str) and os.path.isfile(input_data): logging.debug("Groq: Loading json data for summarization") @@ -662,7 +705,7 @@ def chat_with_groq(api_key, input_data, custom_prompt_arg, temp=None, system_mes raise ValueError("Groq: Invalid input data format") # Set the model to be used - groq_model = loaded_config_data['models']['groq'] + groq_model = loaded_config_data['groq_api']['model'] if temp is None: temp = 0.2 @@ -772,7 +815,7 @@ def chat_with_openrouter(api_key, input_data, custom_prompt_arg, temp=None, syst logging.info("OpenRouter: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - openrouter_api_key = loaded_config_data['api_keys'].get('openrouter') + openrouter_api_key = loaded_config_data['openrouter_api'].get('api_key') if openrouter_api_key: logging.info("OpenRouter: Using API key from config file") else: @@ -781,7 +824,7 @@ def chat_with_openrouter(api_key, input_data, custom_prompt_arg, temp=None, syst # Model Selection validation logging.debug("OpenRouter: Validating model selection") loaded_config_data = load_and_log_configs() - openrouter_model = loaded_config_data['models']['openrouter'] + openrouter_model = loaded_config_data['openrouter_api']['model'] logging.debug(f"OpenRouter: Using model from config file: {openrouter_model}") # Final check to ensure we have a valid API key @@ -940,7 +983,7 @@ def chat_with_huggingface(api_key, input_data, custom_prompt_arg, system_prompt= logging.info("HuggingFace Chat: API key not provided as parameter") logging.info("HuggingFace Chat: Attempting to use API key from config file") - huggingface_api_key = loaded_config_data['api_keys'].get('huggingface') + huggingface_api_key = loaded_config_data['huggingface_api'].get('api_key') logging.debug(f"HuggingFace Chat: API key from config: {huggingface_api_key[:5]}...{huggingface_api_key[-5:]}") if huggingface_api_key is None or huggingface_api_key.strip() == "": @@ -953,7 +996,7 @@ def chat_with_huggingface(api_key, input_data, custom_prompt_arg, system_prompt= } # Setup model - huggingface_model = loaded_config_data['models']['huggingface'] + huggingface_model = loaded_config_data['huggingface_api']['model'] API_URL = f"https://api-inference.huggingface.co/models/{huggingface_model}/v1/chat/completions" if temp is None: @@ -1054,7 +1097,7 @@ def chat_with_deepseek(api_key, input_data, custom_prompt_arg, temp=0.1, system_ logging.info("DeepSeek: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - deepseek_api_key = loaded_config_data['api_keys'].get('deepseek') + deepseek_api_key = loaded_config_data['deepseek_api'].get('api_key') if deepseek_api_key and deepseek_api_key.strip(): deepseek_api_key = deepseek_api_key.strip() logging.info("DeepSeek: Using API key from config file") @@ -1110,7 +1153,7 @@ def chat_with_deepseek(api_key, input_data, custom_prompt_arg, temp=0.1, system_ raise ValueError("DeepSeek: Invalid input data format") # Retrieve the model from config if not provided - deepseek_model = loaded_config_data['models'].get('deepseek', "deepseek-chat") + deepseek_model = loaded_config_data['deepseek_api'].get('deepseek', "deepseek-chat") logging.debug(f"DeepSeek: Using model: {deepseek_model}") # Ensure temperature is a float within acceptable range @@ -1241,7 +1284,7 @@ def chat_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, system_ logging.info("Mistral: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - mistral_api_key = loaded_config_data['api_keys'].get('mistral') + mistral_api_key = loaded_config_data['mistral_api'].get('api_key') if mistral_api_key: logging.info("Mistral: Using API key from config file") else: @@ -1265,7 +1308,7 @@ def chat_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, system_ else: raise ValueError("Mistral: Invalid input data format") - mistral_model = loaded_config_data['models'].get('mistral', "mistral-large-latest") + mistral_model = loaded_config_data['mistral_api'].get('model', "mistral-large-latest") if temp is None: temp = 0.2 @@ -1375,7 +1418,7 @@ def chat_with_google(api_key, input_data, custom_prompt_arg, temp=None, system_m if not google_api_key: logging.info("Google: API key not provided as parameter") logging.info("Google: Attempting to use API key from config file") - google_api_key = loaded_config_data['api_keys']['google'] + google_api_key = loaded_config_data['google_api']['api_key'] if not google_api_key: logging.error("Google: API key not found or is empty") @@ -1428,7 +1471,7 @@ def chat_with_google(api_key, input_data, custom_prompt_arg, temp=None, system_m logging.debug(f"Google: Extracted text (first 500 chars): {text[:500]}...") logging.debug(f"Google: Custom prompt: {custom_prompt_arg}") - google_model = loaded_config_data['models']['google'] or "gemini-1.5-pro" + google_model = loaded_config_data['google_api']['model'] or "gemini-1.5-pro" logging.debug(f"Google: Using model: {google_model}") headers = { diff --git a/App_Function_Libraries/LLM_API_Calls_Local.py b/App_Function_Libraries/LLM_API_Calls_Local.py index 5bb721551..058190c4b 100644 --- a/App_Function_Libraries/LLM_API_Calls_Local.py +++ b/App_Function_Libraries/LLM_API_Calls_Local.py @@ -135,7 +135,7 @@ def chat_with_llama(input_data, custom_prompt, temp, api_url="http://127.0.0.1:8 if api_key is None: logging.info("llama.cpp: API key not provided as parameter") logging.info("llama.cpp: Attempting to use API key from config file") - api_key = loaded_config_data['api_keys']['llama'] + api_key = loaded_config_data['llama_api']['api_key'] if api_key is None or api_key.strip() == "": logging.info("llama.cpp: API key not found or is empty") @@ -145,7 +145,7 @@ def chat_with_llama(input_data, custom_prompt, temp, api_url="http://127.0.0.1:8 if api_url is None: logging.info("llama.cpp: API URL not provided as parameter") logging.info("llama.cpp: Attempting to use API URL from config file") - api_url = loaded_config_data['local_api_ip']['llama'] + api_url = loaded_config_data['llama_api']['api_ip'] if api_url is None or api_url.strip() == "": logging.info("llama.cpp: API URL not found or is empty") @@ -260,7 +260,7 @@ def chat_with_kobold(input_data, api_key, custom_prompt_input, kobold_api_ip="ht logging.info("Kobold: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - kobold_api_key = loaded_config_data['api_keys'].get('kobold') + kobold_api_key = loaded_config_data['kobold_api'].get('api_key') if kobold_api_key: logging.info("Kobold: Using API key from config file") else: @@ -314,14 +314,14 @@ def chat_with_kobold(input_data, api_key, custom_prompt_input, kobold_api_ip="ht logging.debug("kobold: Submitting request to API endpoint") print("kobold: Submitting request to API endpoint") - kobold_api_ip = loaded_config_data['local_api_ip']['kobold'] + kobold_api_ip = loaded_config_data['kobold_api']['api_ip'] if streaming: logging.debug("Kobold Summarization: Streaming mode enabled") try: # Send the request with streaming enabled # Get the Streaming API IP from the config - kobold_openai_api_IP = loaded_config_data['local_api_ip']['kobold_openai'] + kobold_openai_api_IP = loaded_config_data['kobold_api']['api_streaming_ip'] response = requests.post( kobold_openai_api_IP, headers=headers, json=data, stream=True ) @@ -421,7 +421,7 @@ def chat_with_oobabooga(input_data, api_key, custom_prompt, api_url="http://127. if api_key is None: logging.info("ooba: API key not provided as parameter") logging.info("ooba: Attempting to use API key from config file") - api_key = loaded_config_data['api_keys']['ooba'] + api_key = loaded_config_data['ooba_api']['api_key'] if api_key is None or api_key.strip() == "": logging.info("ooba: API key not found or is empty") @@ -540,15 +540,15 @@ def chat_with_tabbyapi( logging.info("TabbyAPI: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - tabby_api_key = loaded_config_data['api_keys'].get('tabby') + tabby_api_key = loaded_config_data['tabby_api'].get('api_key') if tabby_api_key: logging.info("TabbyAPI: Using API key from config file") else: logging.warning("TabbyAPI: No API key found in config file") # Set API IP and model from config.txt - tabby_api_ip = loaded_config_data['local_api_ip']['tabby'] - tabby_model = loaded_config_data['models']['tabby'] + tabby_api_ip = loaded_config_data['tabby_api']['api_ip'] + tabby_model = loaded_config_data['tabby_api']['model'] if temp is None: temp = 0.7 @@ -655,12 +655,12 @@ def chat_with_tabbyapi( # FIXME aphrodite engine - code was literally tab complete in one go from copilot... :/ def chat_with_aphrodite(input_data, custom_prompt_input, api_key=None, api_IP="http://127.0.0.1:8080/completion"): loaded_config_data = load_and_log_configs() - model = loaded_config_data['models']['aphrodite'] + model = loaded_config_data['aphrodite_api']['model'] # API key validation if api_key is None: logging.info("aphrodite: API key not provided as parameter") logging.info("aphrodite: Attempting to use API key from config file") - api_key = loaded_config_data['api_keys']['aphrodite'] + api_key = loaded_config_data['aphrodite_api']['api_key'] if api_key is None or api_key.strip() == "": logging.info("aphrodite: API key not found or is empty") @@ -707,7 +707,7 @@ def chat_with_ollama( logging.info("Ollama: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - ollama_api_key = loaded_config_data['api_keys'].get('ollama') + ollama_api_key = loaded_config_data['ollama_api'].get('api_key') if ollama_api_key: logging.info("Ollama: Using API key from config file") else: @@ -715,14 +715,14 @@ def chat_with_ollama( # Set model from parameter or config if model is None: - model = loaded_config_data['models'].get('ollama') + model = loaded_config_data['ollama_api'].get('model') if model is None: logging.error("Ollama: Model not found in config file") return "Ollama: Model not found in config file" # Set api_url from parameter or config if api_url is None: - api_url = loaded_config_data['local_api_ip'].get('ollama') + api_url = loaded_config_data['ollama_api'].get('api_ip') if api_url is None: logging.error("Ollama: API URL not found in config file") return "Ollama: API URL not found in config file" @@ -910,13 +910,13 @@ def chat_with_vllm( logging.info("vLLM: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - vllm_api_key = loaded_config_data['api_keys'].get('vllm') + vllm_api_key = loaded_config_data['vllm_api'].get('api_key') if vllm_api_key: logging.info("vLLM: Using API key from config file") else: logging.warning("vLLM: No API key found in config file") - if 'vllm' in loaded_config_data['local_api_ip']: - vllm_api_url = loaded_config_data['local_api_ip']['vllm'] + if 'api_ip' in loaded_config_data['vllm_api']: + vllm_api_url = loaded_config_data['vllm_api']['api_ip'] logging.info(f"vLLM: Using API URL from config file: {vllm_api_url}") else: logging.error("vLLM: API URL not found in config file") @@ -927,7 +927,7 @@ def chat_with_vllm( if system_prompt is None: system_prompt = "You are a helpful AI assistant." - model = model or loaded_config_data['models']['vllm'] + model = model or loaded_config_data['vllm_api']['model'] if system_prompt is None: system_prompt = "You are a helpful AI assistant." @@ -1015,7 +1015,7 @@ def chat_with_custom_openai(api_key, input_data, custom_prompt_arg, temp=None, s if not custom_openai_api_key: logging.info("Custom OpenAI API: API key not provided as parameter") logging.info("Custom OpenAI API: Attempting to use API key from config file") - custom_openai_api_key = loaded_config_data['api_keys']['custom_openai_api_key'] + custom_openai_api_key = loaded_config_data['custom_openai_api']['api_key'] if not custom_openai_api_key: logging.error("Custom OpenAI API: API key not found or is empty") @@ -1068,7 +1068,7 @@ def chat_with_custom_openai(api_key, input_data, custom_prompt_arg, temp=None, s logging.debug(f"Custom OpenAI API: Extracted text (first 500 chars): {text[:500]}...") logging.debug(f"v: Custom prompt: {custom_prompt_arg}") - openai_model = loaded_config_data['models']['openai'] or "gpt-4o" + openai_model = loaded_config_data['custom_openai_api']['model'] logging.debug(f"Custom OpenAI API: Using model: {openai_model}") headers = { @@ -1096,7 +1096,7 @@ def chat_with_custom_openai(api_key, input_data, custom_prompt_arg, temp=None, s "stream": streaming } - custom_openai_url = loaded_config_data['Local_api_ip']['custom_openai_api_ip'] + custom_openai_url = loaded_config_data['custom_openai_api']['api_ip'] if streaming: response = requests.post( diff --git a/App_Function_Libraries/Local_LLM/Local_LLM_huggingface.py b/App_Function_Libraries/Local_LLM/Local_LLM_huggingface.py index da40f48fd..1ca29b6c7 100644 --- a/App_Function_Libraries/Local_LLM/Local_LLM_huggingface.py +++ b/App_Function_Libraries/Local_LLM/Local_LLM_huggingface.py @@ -1,8 +1,60 @@ -# import gradio as gr -# from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline -# import os -# import torch -# +# Local_LLM_huggingface.py +# Description: This file contains the functions that are used for performing inference with and managing Hugging Face Transformers models +# +# Imports +import os +# 3rd-Party Imports +from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline +import torch +# Local Imports +# +####################################################################################################################### +# +# Functions: + +# FIXME: This function is not complete +# Setup proper path/configurations for the models +HF_MODELS_DIR = "models" + +# FIXME: This function is not complete +def get_local_models(): + if not os.path.exists(HF_MODELS_DIR): + os.makedirs(HF_MODELS_DIR) + return [d for d in os.listdir(HF_MODELS_DIR) if os.path.isdir(os.path.join(HF_MODELS_DIR, d))] + + +def chat_with_transformers(user_message, system_message, model_name=None, model_path=None, max_new_tokens=100): + pass + +# Prepare the input as before +chat = [ + {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."}, + {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"} +] + +# 1: Load the model and tokenizer +model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16) +tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") + +# 2: Apply the chat template +formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) +print("Formatted chat:\n", formatted_chat) + +# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True) +inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False) +# Move the tokenized inputs to the same device the model is on (GPU/CPU) +inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()} +print("Tokenized inputs:\n", inputs) + +# 4: Generate text from the model +outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1) +print("Generated tokens:\n", outputs) + +# 5: Decode the output back to a string +decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True) +print("Decoded output:\n", decoded_output) + + # # Assuming models are stored in a 'models' directory # MODELS_DIR = "models" # diff --git a/App_Function_Libraries/Prompt_Engineering/Prompt_Engineering.py b/App_Function_Libraries/Prompt_Engineering/Prompt_Engineering.py index c037eeadb..c1b6a5d69 100644 --- a/App_Function_Libraries/Prompt_Engineering/Prompt_Engineering.py +++ b/App_Function_Libraries/Prompt_Engineering/Prompt_Engineering.py @@ -14,6 +14,9 @@ # # Function Definitions + +################################## Meta Prompt Engineering Functions ############################################## + # Function to generate prompt using metaprompt def generate_prompt(api_endpoint, api_key, task, variables_str, temperature): # Convert variables into a list from comma-separated input @@ -492,7 +495,7 @@ def generate_prompt(api_endpoint, api_key, task, variables_str, temperature): # Call chat API to generate the prompt response = chat_api_call(api_endpoint=api_endpoint, api_key=api_key, input_data="", prompt=metaprompt, - temp=temperature) + temp=temperature, streaming=False, minp=None, maxp=None, model=None) return response def extract_between_tags(tag: str, string: str, strip: bool = False) -> list[str]: @@ -537,7 +540,7 @@ def test_generated_prompt(api_endpoint, api_key, generated_prompt, variable_valu prompt_with_values = prompt_with_values.replace(var, value) # Send the filled-in prompt to the chat API - response = chat_api_call(api_endpoint=api_endpoint, api_key=api_key, input_data="", prompt=prompt_with_values, temp=temperature) + response = chat_api_call(api_endpoint=api_endpoint, api_key=api_key, input_data="", prompt=prompt_with_values, temp=temperature, system_message=None, streaming=False, minp=None, maxp=None, model=None) return response # diff --git a/App_Function_Libraries/RAG/RAG_Examples.md b/App_Function_Libraries/RAG/RAG_Examples.md deleted file mode 100644 index 0ca8b3936..000000000 --- a/App_Function_Libraries/RAG/RAG_Examples.md +++ /dev/null @@ -1,556 +0,0 @@ - -``` -################################################################################################################## -# RAG Pipeline 1 -# 0.62 0.61 0.75 63402.0 -# from langchain_openai import ChatOpenAI -# -# from langchain_community.document_loaders import WebBaseLoader -# from langchain_openai import OpenAIEmbeddings -# from langchain.text_splitter import RecursiveCharacterTextSplitter -# from langchain_chroma import Chroma -# -# from langchain_community.retrievers import BM25Retriever -# from langchain.retrievers import ParentDocumentRetriever -# from langchain.storage import InMemoryStore -# import os -# from operator import itemgetter -# from langchain import hub -# from langchain_core.output_parsers import StrOutputParser -# from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda -# from langchain.retrievers import MergerRetriever -# from langchain.retrievers.document_compressors import DocumentCompressorPipeline - - -# def rag_pipeline(): -# try: -# def format_docs(docs): -# return "\n".join(doc.page_content for doc in docs) -# -# llm = ChatOpenAI(model='gpt-4o-mini') -# -# loader = WebBaseLoader('https://en.wikipedia.org/wiki/European_debt_crisis') -# docs = loader.load() -# -# embedding = OpenAIEmbeddings(model='text-embedding-3-large') -# -# splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200) -# splits = splitter.split_documents(docs) -# c = Chroma.from_documents(documents=splits, embedding=embedding, -# collection_name='testindex-ragbuilder-1724657573', ) -# retrievers = [] -# retriever = c.as_retriever(search_type='mmr', search_kwargs={'k': 10}) -# retrievers.append(retriever) -# retriever = BM25Retriever.from_documents(docs) -# retrievers.append(retriever) -# -# parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=600) -# splits = parent_splitter.split_documents(docs) -# store = InMemoryStore() -# retriever = ParentDocumentRetriever(vectorstore=c, docstore=store, child_splitter=splitter, -# parent_splitter=parent_splitter) -# retriever.add_documents(docs) -# retrievers.append(retriever) -# retriever = MergerRetriever(retrievers=retrievers) -# prompt = hub.pull("rlm/rag-prompt") -# rag_chain = ( -# RunnableParallel(context=retriever, question=RunnablePassthrough()) -# .assign(context=itemgetter("context") | RunnableLambda(format_docs)) -# .assign(answer=prompt | llm | StrOutputParser()) -# .pick(["answer", "context"])) -# return rag_chain -# except Exception as e: -# print(f"An error occurred: {e}") - - -# To get the answer and context, use the following code -# res=rag_pipeline().invoke("your prompt here") -# print(res["answer"]) -# print(res["context"]) - -############################################################################################################ - - -############################################################################################################ -# RAG Pipeline 2 - -# 0.6 0.73 0.68 3125.0 -# from langchain_openai import ChatOpenAI -# -# from langchain_community.document_loaders import WebBaseLoader -# from langchain_openai import OpenAIEmbeddings -# from langchain.text_splitter import RecursiveCharacterTextSplitter -# from langchain_chroma import Chroma -# from langchain.retrievers.multi_query import MultiQueryRetriever -# from langchain.retrievers import ParentDocumentRetriever -# from langchain.storage import InMemoryStore -# from langchain_community.document_transformers import EmbeddingsRedundantFilter -# from langchain.retrievers.document_compressors import LLMChainFilter -# from langchain.retrievers.document_compressors import EmbeddingsFilter -# from langchain.retrievers import ContextualCompressionRetriever -# import os -# from operator import itemgetter -# from langchain import hub -# from langchain_core.output_parsers import StrOutputParser -# from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda -# from langchain.retrievers import MergerRetriever -# from langchain.retrievers.document_compressors import DocumentCompressorPipeline - - -# def rag_pipeline(): -# try: -# def format_docs(docs): -# return "\n".join(doc.page_content for doc in docs) -# -# llm = ChatOpenAI(model='gpt-4o-mini') -# -# loader = WebBaseLoader('https://en.wikipedia.org/wiki/European_debt_crisis') -# docs = loader.load() -# -# embedding = OpenAIEmbeddings(model='text-embedding-3-large') -# -# splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200) -# splits = splitter.split_documents(docs) -# c = Chroma.from_documents(documents=splits, embedding=embedding, -# collection_name='testindex-ragbuilder-1724650962', ) -# retrievers = [] -# retriever = MultiQueryRetriever.from_llm(c.as_retriever(search_type='similarity', search_kwargs={'k': 10}), -# llm=llm) -# retrievers.append(retriever) -# -# parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=600) -# splits = parent_splitter.split_documents(docs) -# store = InMemoryStore() -# retriever = ParentDocumentRetriever(vectorstore=c, docstore=store, child_splitter=splitter, -# parent_splitter=parent_splitter) -# retriever.add_documents(docs) -# retrievers.append(retriever) -# retriever = MergerRetriever(retrievers=retrievers) -# arr_comp = [] -# arr_comp.append(EmbeddingsRedundantFilter(embeddings=embedding)) -# arr_comp.append(LLMChainFilter.from_llm(llm)) -# pipeline_compressor = DocumentCompressorPipeline(transformers=arr_comp) -# retriever = ContextualCompressionRetriever(base_retriever=retriever, base_compressor=pipeline_compressor) -# prompt = hub.pull("rlm/rag-prompt") -# rag_chain = ( -# RunnableParallel(context=retriever, question=RunnablePassthrough()) -# .assign(context=itemgetter("context") | RunnableLambda(format_docs)) -# .assign(answer=prompt | llm | StrOutputParser()) -# .pick(["answer", "context"])) -# return rag_chain -# except Exception as e: -# print(f"An error occurred: {e}") - - -# To get the answer and context, use the following code -# res=rag_pipeline().invoke("your prompt here") -# print(res["answer"]) -# print(res["context"]) - -# -# -# -############################################################################################################ -# Plain bm25 retriever -# class BM25Retriever(BaseRetriever): -# """`BM25` retriever without Elasticsearch.""" -# -# vectorizer: Any -# """ BM25 vectorizer.""" -# docs: List[Document] = Field(repr=False) -# """ List of documents.""" -# k: int = 4 -# """ Number of documents to return.""" -# preprocess_func: Callable[[str], List[str]] = default_preprocessing_func -# """ Preprocessing function to use on the text before BM25 vectorization.""" -# -# class Config: -# arbitrary_types_allowed = True -# -# @classmethod -# def from_texts( -# cls, -# texts: Iterable[str], -# metadatas: Optional[Iterable[dict]] = None, -# bm25_params: Optional[Dict[str, Any]] = None, -# preprocess_func: Callable[[str], List[str]] = default_preprocessing_func, -# **kwargs: Any, -# ) -> BM25Retriever: -# """ -# Create a BM25Retriever from a list of texts. -# Args: -# texts: A list of texts to vectorize. -# metadatas: A list of metadata dicts to associate with each text. -# bm25_params: Parameters to pass to the BM25 vectorizer. -# preprocess_func: A function to preprocess each text before vectorization. -# **kwargs: Any other arguments to pass to the retriever. -# -# Returns: -# A BM25Retriever instance. -# """ -# try: -# from rank_bm25 import BM25Okapi -# except ImportError: -# raise ImportError( -# "Could not import rank_bm25, please install with `pip install " -# "rank_bm25`." -# ) -# -# texts_processed = [preprocess_func(t) for t in texts] -# bm25_params = bm25_params or {} -# vectorizer = BM25Okapi(texts_processed, **bm25_params) -# metadatas = metadatas or ({} for _ in texts) -# docs = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadatas)] -# return cls( -# vectorizer=vectorizer, docs=docs, preprocess_func=preprocess_func, **kwargs -# ) -# -# @classmethod -# def from_documents( -# cls, -# documents: Iterable[Document], -# *, -# bm25_params: Optional[Dict[str, Any]] = None, -# preprocess_func: Callable[[str], List[str]] = default_preprocessing_func, -# **kwargs: Any, -# ) -> BM25Retriever: -# """ -# Create a BM25Retriever from a list of Documents. -# Args: -# documents: A list of Documents to vectorize. -# bm25_params: Parameters to pass to the BM25 vectorizer. -# preprocess_func: A function to preprocess each text before vectorization. -# **kwargs: Any other arguments to pass to the retriever. -# -# Returns: -# A BM25Retriever instance. -# """ -# texts, metadatas = zip(*((d.page_content, d.metadata) for d in documents)) -# return cls.from_texts( -# texts=texts, -# bm25_params=bm25_params, -# metadatas=metadatas, -# preprocess_func=preprocess_func, -# **kwargs, -# ) -# -# def _get_relevant_documents( -# self, query: str, *, run_manager: CallbackManagerForRetrieverRun -# ) -> List[Document]: -# processed_query = self.preprocess_func(query) -# return_docs = self.vectorizer.get_top_n(processed_query, self.docs, n=self.k) -# return return_docs -############################################################################################################ - -############################################################################################################ -# ElasticSearch BM25 Retriever -# class ElasticSearchBM25Retriever(BaseRetriever): -# """`Elasticsearch` retriever that uses `BM25`. -# -# To connect to an Elasticsearch instance that requires login credentials, -# including Elastic Cloud, use the Elasticsearch URL format -# https://username:password@es_host:9243. For example, to connect to Elastic -# Cloud, create the Elasticsearch URL with the required authentication details and -# pass it to the ElasticVectorSearch constructor as the named parameter -# elasticsearch_url. -# -# You can obtain your Elastic Cloud URL and login credentials by logging in to the -# Elastic Cloud console at https://cloud.elastic.co, selecting your deployment, and -# navigating to the "Deployments" page. -# -# To obtain your Elastic Cloud password for the default "elastic" user: -# -# 1. Log in to the Elastic Cloud console at https://cloud.elastic.co -# 2. Go to "Security" > "Users" -# 3. Locate the "elastic" user and click "Edit" -# 4. Click "Reset password" -# 5. Follow the prompts to reset the password -# -# The format for Elastic Cloud URLs is -# https://username:password@cluster_id.region_id.gcp.cloud.es.io:9243. -# """ -# -# client: Any -# """Elasticsearch client.""" -# index_name: str -# """Name of the index to use in Elasticsearch.""" -# -# @classmethod -# def create( -# cls, elasticsearch_url: str, index_name: str, k1: float = 2.0, b: float = 0.75 -# ) -> ElasticSearchBM25Retriever: -# """ -# Create a ElasticSearchBM25Retriever from a list of texts. -# -# Args: -# elasticsearch_url: URL of the Elasticsearch instance to connect to. -# index_name: Name of the index to use in Elasticsearch. -# k1: BM25 parameter k1. -# b: BM25 parameter b. -# -# Returns: -# -# """ -# from elasticsearch import Elasticsearch -# -# # Create an Elasticsearch client instance -# es = Elasticsearch(elasticsearch_url) -# -# # Define the index settings and mappings -# settings = { -# "analysis": {"analyzer": {"default": {"type": "standard"}}}, -# "similarity": { -# "custom_bm25": { -# "type": "BM25", -# "k1": k1, -# "b": b, -# } -# }, -# } -# mappings = { -# "properties": { -# "content": { -# "type": "text", -# "similarity": "custom_bm25", # Use the custom BM25 similarity -# } -# } -# } -# -# # Create the index with the specified settings and mappings -# es.indices.create(index=index_name, mappings=mappings, settings=settings) -# return cls(client=es, index_name=index_name) -# -# def add_texts( -# self, -# texts: Iterable[str], -# refresh_indices: bool = True, -# ) -> List[str]: -# """Run more texts through the embeddings and add to the retriever. -# -# Args: -# texts: Iterable of strings to add to the retriever. -# refresh_indices: bool to refresh ElasticSearch indices -# -# Returns: -# List of ids from adding the texts into the retriever. -# """ -# try: -# from elasticsearch.helpers import bulk -# except ImportError: -# raise ImportError( -# "Could not import elasticsearch python package. " -# "Please install it with `pip install elasticsearch`." -# ) -# requests = [] -# ids = [] -# for i, text in enumerate(texts): -# _id = str(uuid.uuid4()) -# request = { -# "_op_type": "index", -# "_index": self.index_name, -# "content": text, -# "_id": _id, -# } -# ids.append(_id) -# requests.append(request) -# bulk(self.client, requests) -# -# if refresh_indices: -# self.client.indices.refresh(index=self.index_name) -# return ids -# -# def _get_relevant_documents( -# self, query: str, *, run_manager: CallbackManagerForRetrieverRun -# ) -> List[Document]: -# query_dict = {"query": {"match": {"content": query}}} -# res = self.client.search(index=self.index_name, body=query_dict) -# -# docs = [] -# for r in res["hits"]["hits"]: -# docs.append(Document(page_content=r["_source"]["content"])) -# return docs -############################################################################################################ - - -############################################################################################################ -# Multi Query Retriever -# class MultiQueryRetriever(BaseRetriever): -# """Given a query, use an LLM to write a set of queries. -# -# Retrieve docs for each query. Return the unique union of all retrieved docs. -# """ -# -# retriever: BaseRetriever -# llm_chain: Runnable -# verbose: bool = True -# parser_key: str = "lines" -# """DEPRECATED. parser_key is no longer used and should not be specified.""" -# include_original: bool = False -# """Whether to include the original query in the list of generated queries.""" -# -# @classmethod -# def from_llm( -# cls, -# retriever: BaseRetriever, -# llm: BaseLanguageModel, -# prompt: BasePromptTemplate = DEFAULT_QUERY_PROMPT, -# parser_key: Optional[str] = None, -# include_original: bool = False, -# ) -> "MultiQueryRetriever": -# """Initialize from llm using default template. -# -# Args: -# retriever: retriever to query documents from -# llm: llm for query generation using DEFAULT_QUERY_PROMPT -# prompt: The prompt which aims to generate several different versions -# of the given user query -# include_original: Whether to include the original query in the list of -# generated queries. -# -# Returns: -# MultiQueryRetriever -# """ -# output_parser = LineListOutputParser() -# llm_chain = prompt | llm | output_parser -# return cls( -# retriever=retriever, -# llm_chain=llm_chain, -# include_original=include_original, -# ) -# -# async def _aget_relevant_documents( -# self, -# query: str, -# *, -# run_manager: AsyncCallbackManagerForRetrieverRun, -# ) -> List[Document]: -# """Get relevant documents given a user query. -# -# Args: -# query: user query -# -# Returns: -# Unique union of relevant documents from all generated queries -# """ -# queries = await self.agenerate_queries(query, run_manager) -# if self.include_original: -# queries.append(query) -# documents = await self.aretrieve_documents(queries, run_manager) -# return self.unique_union(documents) -# -# async def agenerate_queries( -# self, question: str, run_manager: AsyncCallbackManagerForRetrieverRun -# ) -> List[str]: -# """Generate queries based upon user input. -# -# Args: -# question: user query -# -# Returns: -# List of LLM generated queries that are similar to the user input -# """ -# response = await self.llm_chain.ainvoke( -# {"question": question}, config={"callbacks": run_manager.get_child()} -# ) -# if isinstance(self.llm_chain, LLMChain): -# lines = response["text"] -# else: -# lines = response -# if self.verbose: -# logger.info(f"Generated queries: {lines}") -# return lines -# -# async def aretrieve_documents( -# self, queries: List[str], run_manager: AsyncCallbackManagerForRetrieverRun -# ) -> List[Document]: -# """Run all LLM generated queries. -# -# Args: -# queries: query list -# -# Returns: -# List of retrieved Documents -# """ -# document_lists = await asyncio.gather( -# *( -# self.retriever.ainvoke( -# query, config={"callbacks": run_manager.get_child()} -# ) -# for query in queries -# ) -# ) -# return [doc for docs in document_lists for doc in docs] -# -# def _get_relevant_documents( -# self, -# query: str, -# *, -# run_manager: CallbackManagerForRetrieverRun, -# ) -> List[Document]: -# """Get relevant documents given a user query. -# -# Args: -# query: user query -# -# Returns: -# Unique union of relevant documents from all generated queries -# """ -# queries = self.generate_queries(query, run_manager) -# if self.include_original: -# queries.append(query) -# documents = self.retrieve_documents(queries, run_manager) -# return self.unique_union(documents) -# -# def generate_queries( -# self, question: str, run_manager: CallbackManagerForRetrieverRun -# ) -> List[str]: -# """Generate queries based upon user input. -# -# Args: -# question: user query -# -# Returns: -# List of LLM generated queries that are similar to the user input -# """ -# response = self.llm_chain.invoke( -# {"question": question}, config={"callbacks": run_manager.get_child()} -# ) -# if isinstance(self.llm_chain, LLMChain): -# lines = response["text"] -# else: -# lines = response -# if self.verbose: -# logger.info(f"Generated queries: {lines}") -# return lines -# -# def retrieve_documents( -# self, queries: List[str], run_manager: CallbackManagerForRetrieverRun -# ) -> List[Document]: -# """Run all LLM generated queries. -# -# Args: -# queries: query list -# -# Returns: -# List of retrieved Documents -# """ -# documents = [] -# for query in queries: -# docs = self.retriever.invoke( -# query, config={"callbacks": run_manager.get_child()} -# ) -# documents.extend(docs) -# return documents -# -# def unique_union(self, documents: List[Document]) -> List[Document]: -# """Get unique Documents. -# -# Args: -# documents: List of retrieved Documents -# -# Returns: -# List of unique retrieved Documents -# """ -# return _unique_documents(documents) -############################################################################################################ -``` \ No newline at end of file diff --git a/App_Function_Libraries/Researcher/Research_Manager.py b/App_Function_Libraries/Researcher/Research_Manager.py new file mode 100644 index 000000000..a31f61ed3 --- /dev/null +++ b/App_Function_Libraries/Researcher/Research_Manager.py @@ -0,0 +1,24 @@ +# Research_Manager.py +# +# Description: This file contains functions related to the research manager functionality +# Handles all functionality related to 'research', including the management of 'research' data, the retrieval of +# 'research' data, and final creation of 'research' reports. +# +# Imports +import os +# +# 3rd-party Libraries +# +# Local Imports +# +######################################################################################################################## +# +# Functions: + + + + + +# +# End of Research_Manager.py +####################################################################################################################### diff --git a/App_Function_Libraries/Researcher/__init__.py b/App_Function_Libraries/Researcher/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/App_Function_Libraries/STT/STT.py b/App_Function_Libraries/STT/STT.py new file mode 100644 index 000000000..1541360bf --- /dev/null +++ b/App_Function_Libraries/STT/STT.py @@ -0,0 +1,21 @@ +# STT.py +# Description: This file contains the functions to convert speech to text +# +# Imports +# +# External Imports +# +# Local Imports +# +# +####################################################################################################################### +# +# Functions: + + + +#https://github.com/QwenLM/Qwen2-Audio + +# +# End of STT.py +####################################################################################################################### diff --git a/App_Function_Libraries/STT/__init__.py b/App_Function_Libraries/STT/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/App_Function_Libraries/Summarization/Summarization_General_Lib.py b/App_Function_Libraries/Summarization/Summarization_General_Lib.py index f6dd46c9d..feeb1e9c0 100644 --- a/App_Function_Libraries/Summarization/Summarization_General_Lib.py +++ b/App_Function_Libraries/Summarization/Summarization_General_Lib.py @@ -122,13 +122,15 @@ def extract_text_from_segments(segments): def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None, streaming=False): - loaded_config_data = load_and_log_configs() + try: # API key validation if not api_key or api_key.strip() == "": - logging.info("OpenAI: #1 API key not provided as parameter") - logging.info("OpenAI: Attempting to use API key from config file") - api_key = loaded_config_data['api_keys']['openai'] + logging.info("OpenAI Summarize: API key not provided as parameter") + logging.info("OpenAI Summarize: Attempting to use API key from config file") + loaded_config_data = load_and_log_configs() + loaded_config_data.get('openai_api', {}).get('api_key', "") + logging.debug(f"OpenAI Summarize: Using API key from config file: {api_key[:5]}...{api_key[-5:]}") if not api_key or api_key.strip() == "": logging.error("OpenAI: #2 API key not found or is empty") @@ -182,7 +184,7 @@ def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, sys logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...") logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}") - openai_model = loaded_config_data['models']['openai'] or "gpt-4o" + openai_model = loaded_config_data['openai_api']['model'] or "gpt-4o" logging.debug(f"OpenAI: Using model: {openai_model}") headers = { @@ -285,7 +287,7 @@ def summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp=None, logging.info("Anthropic: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - anthropic_api_key = loaded_config_data['api_keys'].get('anthropic') + anthropic_api_key = loaded_config_data['anthropic_api'].get('api_key') if anthropic_api_key: logging.info("Anthropic: Using API key from config file") else: @@ -344,7 +346,7 @@ def summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp=None, "content": f"{text} \n\n\n\n{anthropic_prompt}" } - model = loaded_config_data['models']['anthropic'] + model = loaded_config_data['anthropic_api']['model'] data = { "model": model, @@ -462,7 +464,7 @@ def summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp=None, sys logging.info("Cohere: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - cohere_api_key = loaded_config_data['api_keys'].get('cohere') + cohere_api_key = loaded_config_data['cohere_api'].get('api_key') if cohere_api_key: logging.info("Cohere: Using API key from config file") else: @@ -507,7 +509,7 @@ def summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp=None, sys else: raise ValueError("Cohere: Invalid input data format") - cohere_model = loaded_config_data['models']['cohere'] + cohere_model = loaded_config_data['cohere']['model'] if temp is None: temp = 0.3 @@ -612,7 +614,7 @@ def summarize_with_groq(api_key, input_data, custom_prompt_arg, temp=None, syste logging.info("Groq: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - groq_api_key = loaded_config_data['api_keys'].get('groq') + groq_api_key = loaded_config_data['groq_api'].get('api_key') if groq_api_key: logging.info("Groq: Using API key from config file") else: @@ -652,7 +654,7 @@ def summarize_with_groq(api_key, input_data, custom_prompt_arg, temp=None, syste raise ValueError("Groq: Invalid input data format") # Set the model to be used - groq_model = loaded_config_data['models']['groq'] + groq_model = loaded_config_data['groq_api']['model'] if temp is None: temp = 0.2 @@ -762,7 +764,7 @@ def summarize_with_openrouter(api_key, input_data, custom_prompt_arg, temp=None, logging.info("OpenRouter: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - openrouter_api_key = loaded_config_data['api_keys'].get('openrouter') + openrouter_api_key = loaded_config_data['openrouter_api'].get('api_key') if openrouter_api_key: logging.info("OpenRouter: Using API key from config file") else: @@ -771,7 +773,7 @@ def summarize_with_openrouter(api_key, input_data, custom_prompt_arg, temp=None, # Model Selection validation logging.debug("OpenRouter: Validating model selection") loaded_config_data = load_and_log_configs() - openrouter_model = loaded_config_data['models']['openrouter'] + openrouter_model = loaded_config_data['openrouter_api']['model'] logging.debug(f"OpenRouter: Using model from config file: {openrouter_model}") # Final check to ensure we have a valid API key @@ -937,7 +939,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None logging.info("HuggingFace: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - huggingface_api_key = loaded_config_data['api_keys'].get('huggingface') + huggingface_api_key = loaded_config_data['huggingface_api'].get('api_key') logging.debug(f"HuggingFace: API key from config: {huggingface_api_key[:5]}...{huggingface_api_key[-5:]}") if huggingface_api_key: logging.info("HuggingFace: Using API key from config file") @@ -982,7 +984,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None headers = { "Authorization": f"Bearer {huggingface_api_key}" } - huggingface_model = loaded_config_data['models']['huggingface'] + huggingface_model = loaded_config_data['huggingface_api']['model'] API_URL = f"https://api-inference.huggingface.co/models/{huggingface_model}" if temp is None: temp = 0.1 @@ -1068,7 +1070,7 @@ def summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp=None, s logging.info("DeepSeek: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - deepseek_api_key = loaded_config_data['api_keys'].get('deepseek') + deepseek_api_key = loaded_config_data['deepseek_api'].get('api_key') if deepseek_api_key: logging.info("DeepSeek: Using API key from config file") else: @@ -1107,7 +1109,7 @@ def summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp=None, s else: raise ValueError("DeepSeek: Invalid input data format") - deepseek_model = loaded_config_data['models']['deepseek'] or "deepseek-chat" + deepseek_model = loaded_config_data['deepseek']['model'] or "deepseek-chat" if temp is None: temp = 0.1 @@ -1205,7 +1207,7 @@ def summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, sy logging.info("Mistral: Using API key provided as parameter") else: # If no parameter is provided, use the key from the config - mistral_api_key = loaded_config_data['api_keys'].get('mistral') + mistral_api_key = loaded_config_data['mistral_api'].get('api_key') if mistral_api_key: logging.info("Mistral: Using API key from config file") else: @@ -1244,7 +1246,7 @@ def summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, sy else: raise ValueError("Mistral: Invalid input data format") - mistral_model = loaded_config_data['models']['mistral'] or "mistral-large-latest" + mistral_model = loaded_config_data['mistral_api']['model'] or "mistral-large-latest" if temp is None: temp = 0.2 @@ -1349,7 +1351,7 @@ def summarize_with_google(api_key, input_data, custom_prompt_arg, temp=None, sys if not api_key or api_key.strip() == "": logging.info("Google: #1 API key not provided as parameter") logging.info("Google: Attempting to use API key from config file") - api_key = loaded_config_data['api_keys']['google'] + api_key = loaded_config_data['google_api']['api_key'] if not api_key or api_key.strip() == "": logging.error("Google: #2 API key not found or is empty") @@ -1403,7 +1405,7 @@ def summarize_with_google(api_key, input_data, custom_prompt_arg, temp=None, sys logging.debug(f"Google: Extracted text (first 500 chars): {text[:500]}...") logging.debug(f"Google: Custom prompt: {custom_prompt_arg}") - google_model = loaded_config_data['models']['google'] or "gemini-1.5-pro" + google_model = loaded_config_data['google_api']['model'] or "gemini-1.5-pro" logging.debug(f"Google: Using model: {google_model}") headers = { diff --git a/App_Function_Libraries/TTS/TTS.py b/App_Function_Libraries/TTS/TTS.py new file mode 100644 index 000000000..5bba51de2 --- /dev/null +++ b/App_Function_Libraries/TTS/TTS.py @@ -0,0 +1,497 @@ +# TTS.py +# Description: This file contains the functions to convert text-to-speech +# +# Imports + +import os +from pydub import AudioSegment +import re +import tempfile +from typing import List, Tuple, Optional +# External Imports +# +# Local Imports +# +####################################################################################################################### +# +# Functions: + + +####################################################### +# +# TTS Provider Check Functions + +tts_providers = ["elevenlabs", "openai", "edge", "google", "sovitts", "qwen"] + +def test_all_tts_providers(): + try: + # Load configuration + #config = load_config() + + # Override default TTS model to use edge for tests + test_config = {"text_to_speech": {"default_tts_model": "edge"}} + + # Read input text from file + with open( + "tests/data/transcript_336aa9f955cd4019bc1287379a5a2820.txt", "r" + ) as file: + input_text = file.read() + + # Test ElevenLabs + tts_elevenlabs = TextToSpeech(model="elevenlabs") + elevenlabs_output_file = "tests/data/response_elevenlabs.mp3" + tts_elevenlabs.convert_to_speech(input_text, elevenlabs_output_file) + logging.info( + f"ElevenLabs TTS completed. Output saved to {elevenlabs_output_file}" + ) + + # Test OpenAI + tts_openai = TextToSpeech(model="openai") + openai_output_file = "tests/data/response_openai.mp3" + tts_openai.convert_to_speech(input_text, openai_output_file) + logging.info(f"OpenAI TTS completed. Output saved to {openai_output_file}") + + # Test Edge + tts_edge = TextToSpeech(model="edge") + edge_output_file = "tests/data/response_edge.mp3" + tts_edge.convert_to_speech(input_text, edge_output_file) + logging.info(f"Edge TTS completed. Output saved to {edge_output_file}") + + # Test Google + tts_google = TextToSpeech(model="google") + google_output_file = "tests/data/response_google.mp3" + tts_google.convert_to_speech(input_text, google_output_file) + logging.info(f"Google TTS completed. Output saved to {google_output_file}") + + # Test Sovi TTS + tts_sovitts = TextToSpeech(model="sovitts") + sovitts_output_file = "tests/data/response_sovitts.mp3" + tts_sovitts.convert_to_speech(input_text, sovitts_output_file) + logging.info(f"Sovi TTS completed. Output saved to {sovitts_output_file}") + + # Test Qwen TTS + tts_qwen = TextToSpeech(model="qwen") + qwen_output_file = "tests/data/response_qwen.mp3" + tts_qwen.convert_to_speech(input_text, qwen_output_file) + logging.info(f"Qwen TTS completed. Output saved to {qwen_output_file}") + + except Exception as e: + logging.error(f"An error occurred during text-to-speech conversion: {str(e)}") + raise + +# +# End of TTS Provider Check Functions +####################################################### + + +####################################################### +# +# Text-to-Speak Functions + +import logging +from typing import Callable, Dict, Any, List, Tuple + +from App_Function_Libraries.TTS.TTS_Providers import generate_audio_openai, generate_audio_edge, \ + generate_audio_elevenlabs, tts_providers + +# Setup logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Type aliases +AudioData = bytes +ProviderFunction = Callable[[str, str, str], AudioData] + +# Provider functions +# See TTS_Providers.py + +# Provider registry +PROVIDERS: Dict[str, ProviderFunction] = { + "elevenlabs": generate_audio_elevenlabs, + "openai": generate_audio_openai, + "edge": generate_audio_edge, +} + + +# Utility functions +def clean_text(text: str) -> str: + # Implement text cleaning logic + return text + + +def split_qa(text: str) -> List[Tuple[str, str]]: + # Implement Q&A splitting logic + return [] + + +def merge_audio(audio_segments: List[AudioData]) -> AudioData: + # Implement audio merging logic + return b"" + + +# Main Podcast generation function +def generate_tts_podcast( + text: str, + provider: str, + voice: str, + model: str, + config: Dict[str, Any] +) -> AudioData: + if provider not in PROVIDERS: + raise ValueError(f"Unsupported provider: {provider}") + + tts_func = PROVIDERS[provider] + + cleaned_text = clean_text(text) + qa_pairs = split_qa(cleaned_text) + + audio_segments = [] + for question, answer in qa_pairs: + q_audio = tts_func(question, config["question_voice"], model) + a_audio = tts_func(answer, config["answer_voice"], model) + audio_segments.extend([q_audio, a_audio]) + + return merge_audio(audio_segments) + + +# Configuration management +def load_tts_config() -> Dict[str, Any]: + # Implement configuration loading logic + return {} + + +# API Key management +def set_api_key(provider: str, api_key: str) -> None: + # Implement API key setting logic + pass + +tts_config = load_tts_config() + +def tts_generate_audio_single_speaker(input_text, provider, voice, model): + # if no input_text is passed, use a default text as a shorthand for validating X service works. + if not input_text: + input_text = "Hello, how are you? I'm doing well, thank you!" + if provider not in tts_providers: + raise ValueError(f"Unsupported provider: {provider}") + elif provider == "elevenlabs": + result = generate_audio_elevenlabs(input_text, voice, model) + elif provider == "openai": + result = generate_audio_openai(input_text, voice, model) + elif provider == "edge": + result = generate_audio_edge(input_text, voice, model) + else: + raise ValueError(f"No provider found / Unsupported provider: {provider}") + # Save or process the result as needed + with open("output.mp3", "wb") as f: + f.write(result) + return result + +# +# End of Text-to-Speak Functions +####################################################### + + +####################################################### +# +# Podcast Creation Functions + + +# +# End of TTS.py +####################################################################################################################### + + +class TextToSpeech: + def __init__( + self, + model: str = None, + api_key: Optional[str] = None, + conversation_config: Optional[Dict[str, Any]] = None, + ): + """ + Initialize the TextToSpeech class. + + Args: + model (str): The model to use for text-to-speech conversion. + Options are 'elevenlabs', 'gemini', 'openai', 'edge' or 'geminimulti'. Defaults to 'openai'. + api_key (Optional[str]): API key for the selected text-to-speech service. + conversation_config (Optional[Dict]): Configuration for conversation settings. + """ + self.config = load_config() + self.conversation_config = load_conversation_config(conversation_config) + self.tts_config = self.conversation_config.get("text_to_speech", {}) + + # Get API key from config if not provided + if not api_key: + api_key = getattr(self.config, f"{model.upper().replace('MULTI', '')}_API_KEY", None) + + # Initialize provider using factory + self.provider = TTSProviderFactory.create( + provider_name=model, api_key=api_key, model=model + ) + + # Setup directories and config + self._setup_directories() + self.audio_format = self.tts_config.get("audio_format", "mp3") + self.ending_message = self.tts_config.get("ending_message", "") + + def _get_provider_config(self) -> Dict[str, Any]: + """Get provider-specific configuration.""" + # Get provider name in lowercase without 'TTS' suffix + provider_name = self.provider.__class__.__name__.lower().replace("tts", "") + + # Get provider config from tts_config + provider_config = self.tts_config.get(provider_name, {}) + + # If provider config is empty, try getting from default config + if not provider_config: + provider_config = { + "model": self.tts_config.get("default_model"), + "default_voices": { + "question": self.tts_config.get("default_voice_question"), + "answer": self.tts_config.get("default_voice_answer"), + }, + } + + logger.debug(f"Using provider config: {provider_config}") + return provider_config + + def convert_to_speech(self, text: str, output_file: str) -> None: + """ + Convert input text to speech and save as an audio file. + + Args: + text (str): Input text to convert to speech. + output_file (str): Path to save the output audio file. + + Raises: + ValueError: If the input text is not properly formatted + """ + # Validate transcript format + # self._validate_transcript_format(text) + + cleaned_text = text + + try: + + if ( + "multi" in self.provider.model.lower() + ): # refactor: We should have instead MultiSpeakerTTS and SingleSpeakerTTS classes + provider_config = self._get_provider_config() + voice = provider_config.get("default_voices", {}).get("question") + voice2 = provider_config.get("default_voices", {}).get("answer") + model = provider_config.get("model") + audio_data_list = self.provider.generate_audio( + cleaned_text, + voice="S", + model="en-US-Studio-MultiSpeaker", + voice2="R", + ending_message=self.ending_message, + ) + + try: + # First verify we have data + if not audio_data_list: + raise ValueError("No audio data chunks provided") + + logger.info(f"Starting audio processing with {len(audio_data_list)} chunks") + combined = AudioSegment.empty() + + for i, chunk in enumerate(audio_data_list): + # Save chunk to temporary file + # temp_file = "./tmp.mp3" + # with open(temp_file, "wb") as f: + # f.write(chunk) + + segment = AudioSegment.from_file(io.BytesIO(chunk)) + logger.info(f"################### Loaded chunk {i}, duration: {len(segment)}ms") + + combined += segment + + # Export with high quality settings + os.makedirs(os.path.dirname(output_file), exist_ok=True) + combined.export( + output_file, + format=self.audio_format, + codec="libmp3lame", + bitrate="320k" + ) + + except Exception as e: + logger.error(f"Error during audio processing: {str(e)}") + raise + else: + with tempfile.TemporaryDirectory(dir=self.temp_audio_dir) as temp_dir: + audio_segments = self._generate_audio_segments( + cleaned_text, temp_dir + ) + self._merge_audio_files(audio_segments, output_file) + logger.info(f"Audio saved to {output_file}") + + except Exception as e: + logger.error(f"Error converting text to speech: {str(e)}") + raise + + def _generate_audio_segments(self, text: str, temp_dir: str) -> List[str]: + """Generate audio segments for each Q&A pair.""" + qa_pairs = self.provider.split_qa( + text, self.ending_message, self.provider.get_supported_tags() + ) + audio_files = [] + provider_config = self._get_provider_config() + + for idx, (question, answer) in enumerate(qa_pairs, 1): + for speaker_type, content in [("question", question), ("answer", answer)]: + temp_file = os.path.join( + temp_dir, f"{idx}_{speaker_type}.{self.audio_format}" + ) + voice = provider_config.get("default_voices", {}).get(speaker_type) + model = provider_config.get("model") + + audio_data = self.provider.generate_audio(content, voice, model) + with open(temp_file, "wb") as f: + f.write(audio_data) + audio_files.append(temp_file) + + return audio_files + + def _merge_audio_files(self, audio_files: List[str], output_file: str) -> None: + """ + Merge the provided audio files sequentially, ensuring questions come before answers. + + Args: + audio_files: List of paths to audio files to merge + output_file: Path to save the merged audio file + """ + try: + + def get_sort_key(file_path: str) -> Tuple[int, int]: + """ + Create sort key from filename that puts questions before answers. + Example filenames: "1_question.mp3", "1_answer.mp3" + """ + basename = os.path.basename(file_path) + # Extract the index number and type (question/answer) + idx = int(basename.split("_")[0]) + is_answer = basename.split("_")[1].startswith("answer") + return ( + idx, + 1 if is_answer else 0, + ) # Questions (0) come before answers (1) + + # Sort files by index and type (question/answer) + audio_files.sort(key=get_sort_key) + + # Create empty audio segment + combined = AudioSegment.empty() + + # Add each audio file to the combined segment + for file_path in audio_files: + combined += AudioSegment.from_file(file_path, format=self.audio_format) + + # Ensure output directory exists + os.makedirs(os.path.dirname(output_file), exist_ok=True) + + # Export the combined audio + combined.export(output_file, format=self.audio_format) + logger.info(f"Merged audio saved to {output_file}") + + except Exception as e: + logger.error(f"Error merging audio files: {str(e)}") + raise + + def _setup_directories(self) -> None: + """Setup required directories for audio processing.""" + self.output_directories = self.tts_config.get("output_directories", {}) + temp_dir = self.tts_config.get("temp_audio_dir", "data/audio/tmp/").rstrip("/").split("/") + self.temp_audio_dir = os.path.join(*temp_dir) + base_dir = os.path.abspath(os.path.dirname(__file__)) + self.temp_audio_dir = os.path.join(base_dir, self.temp_audio_dir) + + os.makedirs(self.temp_audio_dir, exist_ok=True) + + # Create directories if they don't exist + for dir_path in [ + self.output_directories.get("transcripts"), + self.output_directories.get("audio"), + self.temp_audio_dir, + ]: + if dir_path and not os.path.exists(dir_path): + os.makedirs(dir_path) + + def _validate_transcript_format(self, text: str) -> None: + """ + Validate that the input text follows the correct transcript format. + + Args: + text (str): Input text to validate + + Raises: + ValueError: If the text is not properly formatted + + The text should: + 1. Have alternating Person1 and Person2 tags + 2. Each opening tag should have a closing tag + 3. Tags should be properly nested + """ + try: + # Check for empty text + if not text.strip(): + raise ValueError("Input text is empty") + + # Check for matching opening and closing tags + person1_open = text.count("") + person1_close = text.count("") + person2_open = text.count("") + person2_close = text.count("") + + if person1_open != person1_close: + raise ValueError( + f"Mismatched Person1 tags: {person1_open} opening tags and {person1_close} closing tags" + ) + if person2_open != person2_close: + raise ValueError( + f"Mismatched Person2 tags: {person2_open} opening tags and {person2_close} closing tags" + ) + + # Check for alternating pattern using regex + pattern = r".*?\s*.*?" + matches = re.findall(pattern, text, re.DOTALL) + + # Calculate expected number of pairs + expected_pairs = min(person1_open, person2_open) + + if len(matches) != expected_pairs: + raise ValueError( + "Tags are not properly alternating between Person1 and Person2. " + "Each Person1 section should be followed by a Person2 section." + ) + + # Check for malformed tags (unclosed or improperly nested) + stack = [] + for match in re.finditer(r"<(/?)Person([12])>", text): + tag = match.group(0) + if tag.startswith("= 400 + + # Save the audio response to a file + with open(output_file, "wb") as f: + f.write(response.content) + + print(f"Audio successfully generated and saved to {output_file}.") + return output_file + + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to generate audio: {str(e)}") from e + else: + try: + # Make the request to the API + response = requests.post(endpoint, headers=headers, json=payload) + response.raise_for_status() # Raise an error for HTTP status codes >= 400 + + # Save the audio response to a file + with open(output_file, "wb") as f: + f.write(response.content) + + print(f"Audio successfully generated and saved to {output_file}.") + return output_file + + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to generate audio: {str(e)}") from e + + +def test_generate_audio_openai(): + try: + logging.info("OpenAI: Attempting to use API key from config file") + api_key = loaded_config_data['openai_api']['api_key'] + + if not api_key: + logging.error("OpenAI: API key not found or is empty") + return "OpenAI: API Key Not Provided/Found in Config file or is empty" + + logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}") + except Exception as e: + logging.error(f"OpenAI: Error loading API Key: {str(e)}") + return f"OpenAI: Error loading API Key: {str(e)}" + + input_text = "The quick brown fox jumped over the lazy dog." + + voice = "alloy" + + model = "tts-1" + + try: + output_file = generate_audio_openai(api_key, input_text, voice, model) + print(f"Generated audio file: {output_file}") + play_mp3(output_file) + except Exception as e: + print(f"Error: {e}") + +# +# End of OpenAI TTS Provider Functions +####################################################### + + +####################################################### +# +# MS Azure TTS Provider Functions +# +#https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-azure-speech/README.md + +# +# End of MS Edge TTS Provider Functions +####################################################### + + +####################################################### +# +# ElvenLabs TTS Provider Functions +# FIXME - all of this + +# https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-elevenlabs/README.md +#https://elevenlabs.io/docs/api-reference/text-to-speech +def generate_audio_elevenlabs(input_text, voice, model=None, api_key=None): + """Generate audio using ElevenLabs API.""" + logging.info("Generating audio using ElevenLabs API.") + CHUNK_SIZE = 1024 + # API key validation + elevenlabs_api_key = api_key + try: + if not elevenlabs_api_key: + logging.info("ElevenLabs: API key not provided as parameter") + logging.info("ElevenLabs: Attempting to use API key from config file") + elevenlabs_api_key = loaded_config_data['elevenlabs_api']['api_key'] + + if not elevenlabs_api_key: + logging.error("ElevenLabs: API key not found or is empty") + return "ElevenLabs: API Key Not Provided/Found in Config file or is empty" + + logging.debug(f"ElevenLabs: Using API Key: {elevenlabs_api_key[:5]}...{elevenlabs_api_key[-5:]}") + except Exception as e: + logging.error(f"ElevenLabs: Error loading API Key: {str(e)}") + return f"ElevenLabs: Error loading API Key: {str(e)}" + + # Input data handling + try: + if not input_text: + raise ValueError("Text input is required.") + logging.debug(f"ElevenLabs: Raw input data type: {type(input_text)}") + logging.debug(f"ElevenLabs: Raw input data (first 500 chars): {str(input_text)[:500]}...") + except Exception as e: + logging.error(f"ElevenLabs: Error loading input text: {str(e)}") + return f"ElevenLabs: Error loading input text: {str(e)}" + + # Handle Voice ID + try: + if not voice: + logging.info("ElevenLabs: Speaker ID(Voice) not provided as parameter") + logging.info("ElevenLabs: Attempting to use Speaker ID(Voice) from config file") + voice = loaded_config_data['tts_settings']['default_eleven_tts_voice'] + + if not voice: + raise ValueError("Voice is required. Default voice not found in config file and no voice selection was passed.") + except Exception as e: + logging.error(f"ElevenLabs: Error loading Speaker ID(Voice): {str(e)}") + return f"ElevenLabs: Error loading Speaker ID(Voice): {str(e)}" + + # Handle Model ID/Selection + try: + if not model: + logging.info("ElevenLabs: Model not provided as parameter") + logging.info("ElevenLabs: Attempting to use Model from config file") + model = loaded_config_data['tts_settings']['default_eleven_tts_model'] + + if not model: + raise ValueError("Model is required. Default model not found in config file and no model selection was passed.") + except Exception as e: + logging.error(f"ElevenLabs: Error Selecting Model: {str(e)}") + return f"ElevenLabs: Error Selecting Model: {str(e)}" + + # FIXME - add SSML tags + # Set the parameters for the TTS conversion + try: + # Stability + stability_str = loaded_config_data['tts_settings'].get('default_eleven_tts_voice_stability', '0.0') + default_eleven_tts_voice_stability = float(stability_str) if stability_str else 0.0 + + # Similarity Boost + similarity_boost_str = loaded_config_data['tts_settings'].get('default_eleven_tts_voice_similiarity_boost', '1.0') + default_eleven_tts_voice_similiarity_boost = float(similarity_boost_str) if similarity_boost_str else 1.0 + + # Style + style_str = loaded_config_data['tts_settings'].get('default_eleven_tts_voice_style', '0.0') + default_eleven_tts_voice_style = float(style_str) if style_str else 0.0 + + # Use Speaker Boost + use_speaker_boost_str = loaded_config_data['tts_settings'].get('default_eleven_tts_voice_use_speaker_boost', 'True') + default_eleven_tts_voice_use_speaker_boost = use_speaker_boost_str.lower() == 'true' if use_speaker_boost_str else True + + # Output Format + default_eleven_tts_output_format = loaded_config_data['tts_settings'].get('default_eleven_tts_output_format', 'mp3_44100_192') + except Exception as e: + logging.error(f"ElevenLabs: Error loading voice settings: {str(e)}") + return f"ElevenLabs: Error loading voice settings: {str(e)}" + + # Make the API request + tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice}/stream?output_format={default_eleven_tts_output_format}" + + # Set up headers for the API request, including the API key for authentication + headers = { + "Accept": "application/json", + "xi-api-key": elevenlabs_api_key + } + + # Set up the data payload for the API request, including the text and voice settings + data = { + "text": input_text, + "model_id": model, + "output_format": default_eleven_tts_output_format, + "voice_settings": { + "stability": default_eleven_tts_voice_stability, + "similarity_boost": default_eleven_tts_voice_similiarity_boost, + "style": default_eleven_tts_voice_style, + "use_speaker_boost": default_eleven_tts_voice_use_speaker_boost + } + } + + try: + # Make the POST request to the TTS API with headers and data, enabling streaming response + with requests.post(tts_url, headers=headers, json=data, stream=True) as response: + # Check if the request was successful + if response.ok: + # Create temp file but don't use context manager + tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") + for chunk in response.iter_content(chunk_size=CHUNK_SIZE): + tmp_file.write(chunk) + tmp_file.flush() + tmp_file.close() # Explicitly close the file handle + temp_file_path = tmp_file.name + print(f"Audio stream saved successfully to {temp_file_path}.") + return temp_file_path + else: + logging.error(f"API request failed: {response.status_code} - {response.text}") + return f"API request failed: {response.status_code} - {response.text}" + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to generate audio: {str(e)}") from e + + +def test_generate_audio_elevenlabs_real_request(): + """Test the function with a real API request.""" + api_key = None + input_text = "This is a test text for generating audio." + voice = None + model = "eleven_turbo_v2" + + # Call the function + result = generate_audio_elevenlabs(input_text=input_text, voice=voice, model=model, api_key=api_key) + + # Assertions + assert os.path.exists(result), f"The file {result} should exist." + assert result.endswith(".mp3"), f"The file {result} should be an MP3 file." + + print(f"Attempting to play file: {result}") + if os.path.exists(result): + play_mp3(result) # Single play call + + +def test_generate_audio_elevenlabs_invalid_api_key(): + """Test the function with an invalid API key.""" + # Use an invalid API key + api_key = "invalid_api_key" + input_text = "This is a test text for generating audio." + voice = "your_voice_id" # Replace with a valid voice ID from ElevenLabs + + # Call the function + result = generate_audio_elevenlabs(input_text=input_text, voice=voice, api_key=api_key) + + # Assertions + assert "API request failed" in result, "The function should return an error message for an invalid API key." + +def test_generate_audio_elevenlabs_missing_input_text(): + """Test the function with missing input text.""" + # Use a valid API key but no input text + api_key = "your_actual_api_key" + input_text = "" + voice = "your_voice_id" # Replace with a valid voice ID from ElevenLabs + + # Call the function + result = generate_audio_elevenlabs(input_text=input_text, voice=voice, api_key=api_key) + + # Assertions + assert "Error loading input text" in result, "The function should return an error message for missing input text." + +# End of ElvenLabs TTS Provider Functions +####################################################### + + +####################################################### +# +# Google Gemini TTS Provider Functions + +# https://github.com/google-gemini/cookbook/blob/main/quickstarts/Audio.ipynb +# Fuck google. lets wait for their docs to not be complete fucking shit. + +# +# End of Google Gemini TTS Provider Functions +####################################################### + + +############################################################ LOCAL ##################################################### + + +####################################################### +# +# AllTalk TTS Provider Functions +# https://github.com/erew123/alltalk_tts +# https://github.com/erew123/alltalk_tts/wiki/API-%E2%80%90-OpenAI-V1-Speech-Compatible-Endpoint + +def generate_audio_alltalk(input_text, voice=None, model=None, response_format=None, speed=None): + """Generate audio using AllTalk API. + + Args: + input_text (str): Text to convert to speech (max 4096 chars) + voice (str, optional): Voice ID ('alloy', 'echo', 'fable', 'nova', 'onyx', 'shimmer') + model (str, optional): Model ID (placeholder) + response_format (str, optional): Audio format (defaults to 'wav') + speed (float, optional): Speech speed (0.25 to 4.0, defaults to 1.0) + + Returns: + str: Path to the generated audio file + """ + + # Input validation + try: + if not input_text: + raise ValueError("Text input is required.") + logging.debug(f"AllTalk: Raw input data type: {type(input_text)}") + logging.debug(f"AllTalk: Raw input data (first 500 chars): {str(input_text)[:500]}...") + except Exception as e: + logging.error(f"AllTalk: Error loading input text: {str(e)}") + return f"AllTalk: Error loading input text: {str(e)}" + try: + if input_text > 4096: + raise ValueError("Text input must be less than 4096 characters.") + except Exception as e: + logging.error(f"AllTalk: Error loading input text(more than 4096 characters): {str(e)}") + return f"AllTalk: Error loading input text(more than 4096 characters): {str(e)}" + + # Handle Voice + try: + if not voice: + logging.info("AllTalk: Voice not provided as parameter") + logging.info("AllTalk: Attempting to use voice from config file") + voice = loaded_config_data['alltalk_api']['voice'] + + if not voice: + raise ValueError("Voice is required. Default voice not found in config file and no voice selection was passed.") + except Exception as e: + logging.error(f"AllTalk: Error loading voice: {str(e)}") + return f"AllTalk: Error loading voice: {str(e)}" + + # Handle Response Format + try: + if not response_format: + logging.info("AllTalk: Format not provided as parameter") + logging.info("AllTalk: Attempting to use format from config file") + response_format = loaded_config_data['alltalk_api']['default_alltalk_tts_output_format'] + + if not response_format: + logging.debug("AllTalk: No response format provided. Defaulting to 'wav'") + response_format = "wav" + except Exception as e: + logging.error(f"AllTalk: Error setting format: {str(e)}") + return f"AllTalk: Error setting format: {str(e)}" + + # Handle Speed + try: + if not speed: + logging.info("AllTalk: Speed not provided as parameter") + logging.info("AllTalk: Attempting to use speed from config file") + speed = loaded_config_data['alltalk_api']['default_alltalk_tts_speed'] + + if not speed: + logging.debug("AllTalk: No speed provided. Defaulting to '1.0'") + speed = 1.0 + + speed = float(speed) + if not 0.25 <= speed <= 4.0: + raise ValueError("Speed must be between 0.25 and 4.0") + except Exception as e: + logging.error(f"AllTalk: Error setting speed: {str(e)}") + return f"AllTalk: Error setting speed: {str(e)}" + + # API URL + try: + alltalk_api_url = loaded_config_data['alltalk_api']['api_ip'] + if not alltalk_api_url: + raise ValueError("API URL not found in config") + except Exception as e: + logging.error(f"AllTalk: Error loading API URL: {str(e)}") + return f"AllTalk: Error loading API URL: {str(e)}" + + # Prepare request + payload = { + "model": model, + "input": input_text, + "voice": voice, + "response_format": response_format, + "speed": speed + } + + headers = { + "Content-Type": "application/json" + } + + try: + # Make the API request without streaming + response = requests.post(alltalk_api_url, json=payload, headers=headers) + + if response.ok: + # Create a temporary file + with tempfile.NamedTemporaryFile(delete=False, + suffix=f".{response_format}") as tmp_file: + # Write the entire response content at once + tmp_file.write(response.content) + tmp_file.flush() + temp_file_path = tmp_file.name + + print(f"Audio stream saved successfully to {temp_file_path}.") + return temp_file_path + else: + error_msg = f"API request failed: {response.status_code} - {response.text}" + logging.error(error_msg) + return error_msg + + except requests.exceptions.RequestException as e: + error_msg = f"Failed to generate audio: {str(e)}" + logging.error(error_msg) + return error_msg + + +def test_generate_audio_alltalk(): + model = "placeholder" + input_text = "The quick brown fox jumped over the yellow lazy dog." + voice = "alloy" + response_format = "wav" + speed = 1.0 + + generate_audio_alltalk(model, input_text, voice, response_format, speed) + +# +# End of AllTalk TTS Provider Functions +####################################################### + + +####################################################### +# +# Piper TTS Provider Functions +# https://github.com/rhasspy/piper +# https://github.com/erew123/alltalk_tts/wiki/API-%E2%80%90-OpenAI-V1-Speech-Compatible-Endpoint + +def generate_audio_piper(input_text, voice=None, model=None, response_format=None, speed=None): + """Generate audio using Piper TTS. + + Args: + + Returns: + str: Path to the generated audio file + """ + + # Input validation + pass + +# +# End of Piper TTS Provider Functions +####################################################### + + +####################################################### +# +# Vevo TTS Provider Functions +# +# https://github.com/open-mmlab/Amphion +# https://huggingface.co/amphion/Vevo + +def generate_audio_vevo(input_text, voice=None, model=None, response_format=None, speed=None): + """Generate audio using Piper TTS. + + Args: + + Returns: + str: Path to the generated audio file + """ + + # Input validation + pass + +# +# End of Vevo TTS Provider Functions +####################################################### + + +####################################################### +# +# gpt-soviTTS TTS Provider Functions +# https://github.com/RVC-Boss/GPT-SoVITS + +# +# End of gpt-soviTTS TTS Provider Functions +####################################################### + +# +# End of TTS_Providers.py +####################################################################################################################### diff --git a/App_Function_Libraries/TTS/__init__.py b/App_Function_Libraries/TTS/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/App_Function_Libraries/Third_Party/Note_Services.py b/App_Function_Libraries/Third_Party/Note_Services.py new file mode 100644 index 000000000..6ad759a77 --- /dev/null +++ b/App_Function_Libraries/Third_Party/Note_Services.py @@ -0,0 +1,24 @@ +# Note_Services.py +# Description: This file contains the functions that are used for interacting with various Note-Taking Services +# +# Imports +import requests +# +# 3rd-Party Imports +# +# Local Imports +# +####################################################################################################################### +# +# Functions: + +###################### Notion API ###################### +# https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-notion/README.md + +# +# End Notion API +###################### End Notion API ###################### + +# +# End of Note_Services.py +####################################################################################################################### diff --git a/App_Function_Libraries/Third_Party/PubMedCentral.py b/App_Function_Libraries/Third_Party/PubMedCentral.py new file mode 100644 index 000000000..c0e5f169d --- /dev/null +++ b/App_Function_Libraries/Third_Party/PubMedCentral.py @@ -0,0 +1,19 @@ +# PubMedCentral.py +# Description: This file contains the functions that are used for performing queries against the PubMedCentral API +# +# Imports +# +# 3rd-Party Imports +# +# Local Imports +# +######################################################################################################################## +# +# Functions: + +def search_pubmedcentral(): + pass + +# +# End of PubMedCentral.py +######################################################################################################################## diff --git a/App_Function_Libraries/Third_Party/Slack.py b/App_Function_Libraries/Third_Party/Slack.py new file mode 100644 index 000000000..ea8f3c580 --- /dev/null +++ b/App_Function_Libraries/Third_Party/Slack.py @@ -0,0 +1,20 @@ +# Slack.py +# Description: This file contains the functions that are used for interacting with Slack API +# +# Imports +import requests +# +# 3rd-Party Imports +# +# Local Imports +# +####################################################################################################################### +# +# Functions: + +# https://api.slack.com/methods/conversations.history +# https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-slack/README.md + +# +# End of Slack.py +####################################################################################################################### diff --git a/App_Function_Libraries/Utils/Utils.py b/App_Function_Libraries/Utils/Utils.py index 06f1fa773..3c4354d2d 100644 --- a/App_Function_Libraries/Utils/Utils.py +++ b/App_Function_Libraries/Utils/Utils.py @@ -219,7 +219,11 @@ def load_and_log_configs(): google_api_key = config.get('API', 'google_api_key', fallback=None) logging.debug( - f"Loaded Mistral API Key: {google_api_key[:5]}...{google_api_key[-5:] if google_api_key else None}") + f"Loaded Google API Key: {google_api_key[:5]}...{google_api_key[-5:] if google_api_key else None}") + + elevenlabs_api_key = config.get('API', 'elevenlabs_api_key', fallback=None) + logging.debug( + f"Loaded elevenlabs API Key: {elevenlabs_api_key[:5]}...{elevenlabs_api_key[-5:] if elevenlabs_api_key else None}") # Models anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229') @@ -232,6 +236,42 @@ def load_and_log_configs(): mistral_model = config.get('API', 'mistral_model', fallback='mistral-large-latest') google_model = config.get('API', 'google_model', fallback='gemini-1.5-pro') + # LLM API Settings - streaming / temperature / top_p / min_p + anthropic_streaming = config.get('API', 'anthropic_streaming', fallback='False') + anthropic_temperature = config.get('API', 'anthropic_temperature', fallback='0.7') + anthropic_top_p = config.get('API', 'anthropic_top_p', fallback='0.95') + anthropic_min_p = config.get('API', 'anthropic_min_p', fallback='0.05') + cohere_streaming = config.get('API', 'cohere_streaming', fallback='False') + cohere_temperature = config.get('API', 'cohere_temperature', fallback='0.7') + cohere_min_p = config.get('API', 'cohere_min_p', fallback='0.05') + groq_streaming = config.get('API', 'groq_streaming', fallback='False') + groq_temperature = config.get('API', 'groq_temperature', fallback='0.7') + groq_top_p = config.get('API', 'groq_top_p', fallback='0.95') + groq_min_p = config.get('API', 'groq_min_p', fallback='0.05') + openai_streaming = config.get('API', 'openai_streaming', fallback='False') + openai_temperature = config.get('API', 'openai_temperature', fallback='0.7') + openai_top_p = config.get('API', 'openai_top_p', fallback='0.95') + huggingface_streaming = config.get('API', 'huggingface_streaming', fallback='False') + huggingface_temperature = config.get('API', 'huggingface_temperature', fallback='0.7') + huggingface_top_p = config.get('API', 'huggingface_top_p', fallback='0.95') + huggingface_min_p = config.get('API', 'huggingface_min_p', fallback='0.05') + openrouter_streaming = config.get('API', 'openrouter_streaming', fallback='False') + openrouter_temperature = config.get('API', 'openrouter_temperature', fallback='0.7') + openrouter_top_p = config.get('API', 'openrouter_top_p', fallback='0.95') + openrouter_min_p = config.get('API', 'openrouter_min_p', fallback='0.05') + deepseek_streaming = config.get('API', 'deepseek_streaming', fallback='False') + deepseek_temperature = config.get('API', 'deepseek_temperature', fallback='0.7') + deepseek_top_p = config.get('API', 'deepseek_top_p', fallback='0.95') + deepseek_min_p = config.get('API', 'deepseek_min_p', fallback='0.05') + mistral_streaming = config.get('API', 'mistral_streaming', fallback='False') + mistral_temperature = config.get('API', 'mistral_temperature', fallback='0.7') + mistral_top_p = config.get('API', 'mistral_top_p', fallback='0.95') + mistral_min_p = config.get('API', 'mistral_min_p', fallback='0.05') + google_streaming = config.get('API', 'google_streaming', fallback='False') + google_temperature = config.get('API', 'google_temperature', fallback='0.7') + google_top_p = config.get('API', 'google_top_p', fallback='0.95') + google_min_p = config.get('API', 'google_min_p', fallback='0.05') + logging.debug(f"Loaded Anthropic Model: {anthropic_model}") logging.debug(f"Loaded Cohere Model: {cohere_model}") logging.debug(f"Loaded Groq Model: {groq_model}") @@ -245,12 +285,25 @@ def load_and_log_configs(): kobold_api_ip = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate') kobold_openai_api_IP = config.get('Local-API', 'kobold_openai_api_IP', fallback='http://127.0.0.1:5001/v1/chat/completions') kobold_api_key = config.get('Local-API', 'kobold_api_key', fallback='') + kobold_streaming = config.get('Local-API', 'kobold_streaming', fallback='False') + kobold_temperature = config.get('Local-API', 'kobold_temperature', fallback='0.7') + kobold_top_p = config.get('Local-API', 'kobold_top_p', fallback='0.95') + kobold_min_p = config.get('Local-API', 'kobold_min_p', fallback='0.05') + llama_api_IP = config.get('Local-API', 'llama_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions') llama_api_key = config.get('Local-API', 'llama_api_key', fallback='') + llama_streaming = config.get('Local-API', 'llama_streaming', fallback='False') + llama_temperature = config.get('Local-API', 'llama_temperature', fallback='0.7') + llama_top_p = config.get('Local-API', 'llama_top_p', fallback='0.95') + llama_min_p = config.get('Local-API', 'llama_min_p', fallback='0.05') ooba_api_IP = config.get('Local-API', 'ooba_api_IP', fallback='http://127.0.0.1:5000/v1/chat/completions') ooba_api_key = config.get('Local-API', 'ooba_api_key', fallback='') + ooba_streaming = config.get('Local-API', 'ooba_streaming', fallback='False') + ooba_temperature = config.get('Local-API', 'ooba_temperature', fallback='0.7') + ooba_top_p = config.get('Local-API', 'ooba_top_p', fallback='0.95') + ooba_min_p = config.get('Local-API', 'ooba_min_p', fallback='0.05') tabby_api_IP = config.get('Local-API', 'tabby_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate') tabby_api_key = config.get('Local-API', 'tabby_api_key', fallback=None) @@ -266,11 +319,14 @@ def load_and_log_configs(): aphrodite_api_url = config.get('Local-API', 'aphrodite_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions') aphrodite_api_key = config.get('Local-API', 'aphrodite_api_key', fallback='') + aphrodite_model = config.get('Local-API', 'aphrodite_model', fallback='') custom_openai_api_key = config.get('API', 'custom_openai_api_key', fallback=None) custom_openai_api_url = config.get('API', 'custom_openai_url', fallback=None) logging.debug( f"Loaded Custom openai-like endpoint API Key: {custom_openai_api_key[:5]}...{custom_openai_api_key[-5:] if custom_openai_api_key else None}") + custom_openai_api_streaming = config.get('API', 'custom_openai_streaming', fallback='False') + custom_openai_api_temperature = config.get('API', 'custom_openai_temperature', fallback='0.7') logging.debug(f"Loaded Kobold API IP: {kobold_api_ip}") logging.debug(f"Loaded Llama API IP: {llama_api_IP}") @@ -281,6 +337,10 @@ def load_and_log_configs(): # Retrieve default API choices from the configuration file default_api = config.get('API', 'default_api', fallback='openai') + # Retrieve LLM API settings from the configuration file + local_api_retries = config.get('Local-API', 'Settings', fallback='3') + local_api_retry_delay = config.get('Local-API', 'local_api_retry_delay', fallback='5') + # Retrieve output paths from the configuration file output_path = config.get('Paths', 'output_path', fallback='results') logging.debug(f"Output path set to: {output_path}") @@ -311,51 +371,234 @@ def load_and_log_configs(): # Local API Timeout local_api_timeout = config.get('Local-API', 'local_api_timeout', fallback='90') + # TTS Settings + # FIXME + default_tts_provider = config.get('TTS-Settings', 'default_tts_provider', fallback='openai') + tts_voice = config.get('TTS-Settings', 'default_tts_voice', fallback='shimmer') + # Open AI TTS + default_openai_tts_model = config.get('TTS-Settings', 'default_openai_tts_model', fallback='tts-1-hd') + default_openai_tts_voice = config.get('TTS-Settings', 'default_openai_tts_voice', fallback='shimmer') + default_openai_tts_speed = config.get('TTS-Settings', 'default_openai_tts_speed', fallback='1') + default_openai_tts_output_format = config.get('TTS-Settings', 'default_openai_tts_output_format', fallback='mp3') + # Google TTS + # FIXME - FIX THESE DEFAULTS + default_google_tts_model = config.get('TTS-Settings', 'default_google_tts_model', fallback='en') + default_google_tts_voice = config.get('TTS-Settings', 'default_google_tts_voice', fallback='en') + default_google_tts_speed = config.get('TTS-Settings', 'default_google_tts_speed', fallback='1') + # ElevenLabs TTS + default_eleven_tts_model = config.get('TTS-Settings', 'default_eleven_tts_model', fallback='FIXME') + default_eleven_tts_voice = config.get('TTS-Settings', 'default_eleven_tts_voice', fallback='FIXME') + default_eleven_tts_language_code = config.get('TTS-Settings', 'default_eleven_tts_language_code', fallback='FIXME') + default_eleven_tts_voice_stability = config.get('TTS-Settings', 'default_eleven_tts_voice_stability', fallback='FIXME') + default_eleven_tts_voice_similiarity_boost = config.get('TTS-Settings', 'default_eleven_tts_voice_similiarity_boost', fallback='FIXME') + default_eleven_tts_voice_style = config.get('TTS-Settings', 'default_eleven_tts_voice_style', fallback='FIXME') + default_eleven_tts_voice_use_speaker_boost = config.get('TTS-Settings', 'default_eleven_tts_voice_use_speaker_boost', fallback='FIXME') + default_eleven_tts_output_format = config.get('TTS-Settings', 'default_eleven_tts_output_format', + fallback='mp3_44100_192') + # AllTalk TTS + alltalk_api_ip = config.get('TTS-Settings', 'alltalk_api_ip', fallback='http://127.0.0.1:7851/v1/audio/speech') + default_alltalk_tts_model = config.get('TTS-Settings', 'default_alltalk_tts_model', fallback='alltalk_model') + default_alltalk_tts_voice = config.get('TTS-Settings', 'default_alltalk_tts_voice', fallback='alloy') + default_alltalk_tts_speed = config.get('TTS-Settings', 'default_alltalk_tts_speed', fallback=1.0) + default_alltalk_tts_output_format = config.get('TTS-Settings', 'default_alltalk_tts_output_format', fallback='mp3') + + # Search Engines + search_provider_default = config.get('Search-Engines', 'search_provider_default', fallback='google') + search_language_query = config.get('Search-Engines', 'search_language_query', fallback='en') + search_language_results = config.get('Search-Engines', 'search_language_results', fallback='en') + search_language_analysis = config.get('Search-Engines', 'search_language_analysis', fallback='en') + search_default_max_queries = 10 + search_enable_subquery = config.get('Search-Engines', 'search_enable_subquery', fallback='True') + search_enable_subquery_count_max = config.get('Search-Engines', 'search_enable_subquery_count_max', fallback=5) + search_result_rerank = config.get('Search-Engines', 'search_result_rerank', fallback='True') + search_result_max = config.get('Search-Engines', 'search_result_max', fallback=10) + search_result_max_per_query = config.get('Search-Engines', 'search_result_max_per_query', fallback=10) + search_result_blacklist = config.get('Search-Engines', 'search_result_blacklist', fallback='') + search_result_display_type = config.get('Search-Engines', 'search_result_display_type', fallback='list') + search_result_display_metadata = config.get('Search-Engines', 'search_result_display_metadata', fallback='False') + search_result_save_to_db = config.get('Search-Engines', 'search_result_save_to_db', fallback='True') + search_result_analysis_tone = config.get('Search-Engines', 'search_result_analysis_tone', fallback='') + relevance_analysis_llm = config.get('Search-Engines', 'relevance_analysis_llm', fallback='False') + final_answer_llm = config.get('Search-Engines', 'final_answer_llm', fallback='False') + # Search Engine Specifics + baidu_search_api_key = config.get('Search-Engines', 'search_engine_api_key_baidu', fallback='') + # Bing Search Settings + bing_search_api_key = config.get('Search-Engines', 'search_engine_api_key_bing', fallback='') + bing_country_code = config.get('Search-Engines', 'search_engine_country_code_bing', fallback='us') + bing_search_api_url = config.get('Search-Engines', 'search_engine_api_url_bing', fallback='') + # Brave Search Settings + brave_search_api_key = config.get('Search-Engines', 'search_engine_api_key_brave_regular', fallback='') + brave_search_ai_api_key = config.get('Search-Engines', 'search_engine_api_key_brave_ai', fallback='') + brave_country_code = config.get('Search-Engines', 'search_engine_country_code_brave', fallback='us') + # DuckDuckGo Search Settings + duckduckgo_search_api_key = config.get('Search-Engines', 'search_engine_api_key_duckduckgo', fallback='') + # Google Search Settings + google_search_api_url = config.get('Search-Engines', 'search_engine_api_url_google', fallback='') + google_search_api_key = config.get('Search-Engines', 'search_engine_api_key_google', fallback='') + google_search_engine_id = config.get('Search-Engines', 'search_engine_id_google', fallback='') + google_simp_trad_chinese = config.get('Search-Engines', 'enable_traditional_chinese', fallback='0') + limit_google_search_to_country = config.get('Search-Engines', 'limit_google_search_to_country', fallback='0') + google_search_country = config.get('Search-Engines', 'google_search_country', fallback='us') + google_search_country_code = config.get('Search-Engines', 'google_search_country_code', fallback='us') + google_filter_setting = config.get('Search-Engines', 'google_filter_setting', fallback='1') + google_user_geolocation = config.get('Search-Engines', 'google_user_geolocation', fallback='') + google_ui_language = config.get('Search-Engines', 'google_ui_language', fallback='en') + google_limit_search_results_to_language = config.get('Search-Engines', 'google_limit_search_results_to_language', fallback='') + google_default_search_results = config.get('Search-Engines', 'google_default_search_results', fallback='10') + google_safe_search = config.get('Search-Engines', 'google_safe_search', fallback='active') + google_enable_site_search = config.get('Search-Engines', 'google_enable_site_search', fallback='0') + google_site_search_include = config.get('Search-Engines', 'google_site_search_include', fallback='') + google_site_search_exclude = config.get('Search-Engines', 'google_site_search_exclude', fallback='') + google_sort_results_by = config.get('Search-Engines', 'google_sort_results_by', fallback='relevance') + # Kagi Search Settings + kagi_search_api_key = config.get('Search-Engines', 'search_engine_api_key_kagi', fallback='') + # Searx Search Settings + search_engine_searx_api = config.get('Search-Engines', 'search_engine_searx_api', fallback='') + # Tavily Search Settings + tavily_search_api_key = config.get('Search-Engines', 'search_engine_api_key_tavily', fallback='') + # Yandex Search Settings + yandex_search_api_key = config.get('Search-Engines', 'search_engine_api_key_yandex', fallback='') + yandex_search_engine_id = config.get('Search-Engines', 'search_engine_id_yandex', fallback='') + + # Prompts + sub_question_generation_prompt = config.get('Prompts', 'sub_question_generation_prompt', fallback='') + search_result_relevance_eval_prompt = config.get('Prompts', 'search_result_relevance_eval_prompt', fallback='') + analyze_search_results_prompt = config.get('Prompts', 'analyze_search_results_prompt', fallback='') + return { - 'api_keys': { - 'anthropic': anthropic_api_key, - 'cohere': cohere_api_key, - 'groq': groq_api_key, - 'openai': openai_api_key, - 'huggingface': huggingface_api_key, - 'openrouter': openrouter_api_key, - 'deepseek': deepseek_api_key, - 'mistral': mistral_api_key, - 'google': google_api_key, - 'kobold': kobold_api_key, - 'llama': llama_api_key, - 'ooba': ooba_api_key, - 'tabby': tabby_api_key, - 'vllm': vllm_api_key, - 'ollama': ollama_api_key, - 'aphrodite': aphrodite_api_key, - 'custom_openai_api_key': custom_openai_api_key + 'anthropic_api': { + 'api_key': anthropic_api_key, + 'model': anthropic_model, + 'streaming': anthropic_streaming, + 'temperature': anthropic_temperature, + 'top_p': anthropic_top_p, + 'min_p': anthropic_min_p }, - 'models': { - 'anthropic': anthropic_model, - 'cohere': cohere_model, - 'groq': groq_model, - 'openai': openai_model, - 'huggingface': huggingface_model, - 'openrouter': openrouter_model, - 'deepseek': deepseek_model, - 'mistral': mistral_model, - 'google': google_model, - 'vllm': vllm_model, - 'tabby': tabby_model, - 'ollama': ollama_model - + 'cohere_api': { + 'api_key': cohere_api_key, + 'model': cohere_model, + 'streaming': cohere_streaming, + 'temperature': cohere_temperature, + 'min_p': cohere_min_p + }, + 'deepseek_api': { + 'api_key': deepseek_api_key, + 'model': deepseek_model, + 'streaming': deepseek_streaming, + 'temperature': deepseek_temperature, + 'top_p': deepseek_top_p, + 'min_p': deepseek_min_p + }, + 'google_api': { + 'api_key': google_api_key, + 'model': google_model, + 'streaming': google_streaming, + 'temperature': google_temperature, + 'top_p': google_top_p, + 'min_p': google_min_p + }, + 'groq_api': { + 'api_key': groq_api_key, + 'model': groq_model, + 'streaming': groq_streaming, + 'temperature': groq_temperature, + 'top_p': groq_top_p, + 'min_p': groq_min_p + }, + 'huggingface_api': { + 'api_key': huggingface_api_key, + 'model': huggingface_model, + 'streaming': huggingface_streaming, + }, + 'mistral_api': { + 'api_key': mistral_api_key, + 'model': mistral_model, + 'streaming': mistral_streaming, + 'temperature': mistral_temperature, + 'top_p': mistral_top_p, + 'min_p': mistral_min_p + }, + 'openrouter_api': { + 'api_key': openrouter_api_key, + 'model': openrouter_model, + 'streaming': openrouter_streaming, + 'temperature': openrouter_temperature, + 'top_p': openrouter_top_p, + 'min_p': openrouter_min_p + }, + 'openai_api': { + 'api_key': openai_api_key, + 'model': openai_model, + 'streaming': openai_streaming, + 'temperature': openai_temperature, + 'top_p': openai_top_p, + }, + 'elevenlabs_api': { + 'api_key': elevenlabs_api_key + }, + 'alltalk_api': { + 'api_ip': alltalk_api_ip, + 'default_alltalk_tts_model': default_alltalk_tts_model, + 'default_alltalk_tts_voice': default_alltalk_tts_voice, + 'default_alltalk_tts_speed': default_alltalk_tts_speed, + 'default_alltalk_tts_output_format': default_alltalk_tts_output_format, + }, + 'custom_openai_api': { + 'api_key': custom_openai_api_key, + 'api_url': custom_openai_api_url, + 'streaming': custom_openai_api_streaming, + 'temperature': custom_openai_api_temperature, + }, + 'llama_api': { + 'api_ip': llama_api_IP, + 'api_key': llama_api_key, + 'streaming': llama_streaming, + 'temperature': llama_temperature, + 'top_p': llama_top_p, + 'min_p': llama_min_p + }, + 'ooba_api': { + 'api_ip': ooba_api_IP, + 'api_key': ooba_api_key, + 'streaming': ooba_streaming, + 'temperature': ooba_temperature, + 'top_p': ooba_top_p, + 'min_p': ooba_min_p + }, + 'kobold_api': { + 'api_ip': kobold_api_ip, + 'api_streaming_ip': kobold_openai_api_IP, + 'api_key': kobold_api_key, + 'streaming': kobold_streaming, + 'temperature': kobold_temperature, + 'top_p': kobold_top_p, + 'min_p': kobold_min_p + }, + 'tabby_api': { + 'api_ip': tabby_api_IP, + 'api_key': tabby_api_key, + 'model': tabby_model + }, + 'vllm_api': { + 'api_url': vllm_api_url, + 'api_key': vllm_api_key, + 'model': vllm_model + }, + 'ollama_api': { + 'api_url': ollama_api_url, + 'api_key': ollama_api_key, + 'model': ollama_model }, - 'local_api_ip': { - 'kobold': kobold_api_ip, - 'kobold_openai': kobold_openai_api_IP, - 'llama': llama_api_IP, - 'ooba': ooba_api_IP, - 'tabby': tabby_api_IP, - 'vllm': vllm_api_url, - 'ollama': ollama_api_url, - 'aphrodite': aphrodite_api_url, - 'custom_openai_api_ip': custom_openai_api_url + 'aphrodite_api': { + 'api_url': aphrodite_api_url, + 'api_key': aphrodite_api_key, + 'model': aphrodite_model, + }, + 'llm_api_settings': { + 'default_api': default_api, + 'local_api_timeout': local_api_timeout, + 'local_api_retries': local_api_retries, + 'local_api_retry_delay': local_api_retry_delay, }, 'output_path': output_path, 'processing_choice': processing_choice, @@ -382,30 +625,121 @@ def load_and_log_configs(): 'save_rag_chats': save_rag_chats, }, 'default_api': default_api, - 'local_api_timeout': local_api_timeout + 'local_api_timeout': local_api_timeout, + 'tts_settings': { + 'default_tts_provider': default_tts_provider, + 'tts_voice': tts_voice, + # OpenAI + 'default_openai_tts_voice': default_openai_tts_voice, + 'default_openai_tts_speed': default_openai_tts_speed, + 'default_openai_tts_model': default_openai_tts_model, + 'default_openai_tts_output_format': default_openai_tts_output_format, + # Google + 'default_google_tts_model': default_google_tts_model, + 'default_google_tts_voice': default_google_tts_voice, + 'default_google_tts_speed': default_google_tts_speed, + # ElevenLabs + 'default_eleven_tts_model': default_eleven_tts_model, + 'default_eleven_tts_voice': default_eleven_tts_voice, + 'default_eleven_tts_language_code': default_eleven_tts_language_code, + 'default_eleven_tts_voice_stability': default_eleven_tts_voice_stability, + 'default_eleven_tts_voice_similiarity_boost': default_eleven_tts_voice_similiarity_boost, + 'default_eleven_tts_voice_style': default_eleven_tts_voice_style, + 'default_eleven_tts_voice_use_speaker_boost': default_eleven_tts_voice_use_speaker_boost, + 'default_eleven_tts_output_format': default_eleven_tts_output_format + # GPT Sovi-TTS + }, + 'search_settings': { + 'default_search_provider': search_provider_default, + 'search_language_query': search_language_query, + 'search_language_results': search_language_results, + 'search_language_analysis': search_language_analysis, + 'search_default_max_queries': search_default_max_queries, + 'search_enable_subquery': search_enable_subquery, + 'search_enable_subquery_count_max': search_enable_subquery_count_max, + 'search_result_rerank': search_result_rerank, + 'search_result_max': search_result_max, + 'search_result_max_per_query': search_result_max_per_query, + 'search_result_blacklist': search_result_blacklist, + 'search_result_display_type': search_result_display_type, + 'search_result_display_metadata': search_result_display_metadata, + 'search_result_save_to_db': search_result_save_to_db, + 'search_result_analysis_tone': search_result_analysis_tone, + 'relevance_analysis_llm': relevance_analysis_llm, + 'final_answer_llm': final_answer_llm, + }, + 'search_engines': { + 'baidu_search_api_key': baidu_search_api_key, + 'bing_search_api_key': bing_search_api_key, + 'bing_country_code': bing_country_code, + 'bing_search_api_url': bing_search_api_url, + 'brave_search_api_key': brave_search_api_key, + 'brave_search_ai_api_key': brave_search_ai_api_key, + 'brave_country_code': brave_country_code, + 'duckduckgo_search_api_key': duckduckgo_search_api_key, + 'google_search_api_url': google_search_api_url, + 'google_search_api_key': google_search_api_key, + 'google_search_engine_id': google_search_engine_id, + 'google_simp_trad_chinese': google_simp_trad_chinese, + 'limit_google_search_to_country': limit_google_search_to_country, + 'google_search_country': google_search_country, + 'google_search_country_code': google_search_country_code, + 'google_search_filter_setting': google_filter_setting, + 'google_user_geolocation': google_user_geolocation, + 'google_ui_language': google_ui_language, + 'google_limit_search_results_to_language': google_limit_search_results_to_language, + 'google_site_search_include': google_site_search_include, + 'google_site_search_exclude': google_site_search_exclude, + 'google_sort_results_by': google_sort_results_by, + 'google_default_search_results': google_default_search_results, + 'google_safe_search': google_safe_search, + 'google_enable_site_search' : google_enable_site_search, + 'kagi_search_api_key': kagi_search_api_key, + 'searx_search_api_url': search_engine_searx_api, + 'tavily_search_api_key': tavily_search_api_key, + 'yandex_search_api_key': yandex_search_api_key, + 'yandex_search_engine_id': yandex_search_engine_id + }, + 'prompts': { + 'sub_question_generation_prompt': sub_question_generation_prompt, + 'search_result_relevance_eval_prompt': search_result_relevance_eval_prompt, + 'analyze_search_results_prompt': analyze_search_results_prompt, + }, } except Exception as e: logging.error(f"Error loading config: {str(e)}") return None -global_api_endpoints = ["anthropic", "cohere", "groq", "openai", "huggingface", "openrouter", "deepseek", "mistral", "google", "custom_openai_api", "llama", "ooba", "kobold", "tabby", "vllm", "ollama", "aphrodite"] + +global_api_endpoints = ["anthropic", "cohere", "google", "groq", "openai", "huggingface", "openrouter", "deepseek", "mistral", "custom_openai_api", "llama", "ooba", "kobold", "tabby", "vllm", "ollama", "aphrodite"] + +global_search_engines = ["baidu", "bing", "brave", "duckduckgo", "google", "kagi", "searx", "tavily", "yandex"] + +openai_tts_voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] + # Setup Default API Endpoint -loaded_config_data = load_and_log_configs() -default_api_endpoint = loaded_config_data['default_api'] +try: + loaded_config_data = load_and_log_configs() + default_api_endpoint = loaded_config_data['default_api'] + print(f"Default API Endpoint: {default_api_endpoint}") +except Exception as e: + logging.error(f"Error loading default API endpoint: {str(e)}") + default_api_endpoint = "openai" + def format_api_name(api): name_mapping = { "openai": "OpenAI", "anthropic": "Anthropic", "cohere": "Cohere", + "google": "Google", "groq": "Groq", "huggingface": "HuggingFace", "openrouter": "OpenRouter", "deepseek": "DeepSeek", "mistral": "Mistral", - "google": "Google", "custom_openai_api": "Custom-OpenAI-API", "llama": "Llama.cpp", "ooba": "Ooba", @@ -416,9 +750,6 @@ def format_api_name(api): "aphrodite": "Aphrodite" } return name_mapping.get(api, api.title()) -print(f"Default API Endpoint: {default_api_endpoint}") - - # # End of Config loading @@ -596,9 +927,6 @@ def create_download_directory(title): return session_path -import chardet -import logging - def safe_read_file(file_path): encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252', 'utf-8-sig'] @@ -725,7 +1053,6 @@ def normalize_title(title, preserve_spaces=False): return title.strip('_') - def clean_youtube_url(url): parsed_url = urlparse(url) query_params = parse_qs(parsed_url.query) @@ -820,9 +1147,6 @@ def get_db_config(): 'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200) } - - - # # End of DB Config Loading ####################################################################################################################### @@ -838,6 +1162,7 @@ def format_text_with_line_breaks(text): # Track temp files for cleanup temp_files = [] + temp_file_paths = [] def save_temp_file(file): diff --git a/App_Function_Libraries/Web_Scraping/Article_Extractor_Lib.py b/App_Function_Libraries/Web_Scraping/Article_Extractor_Lib.py index 2f2daee7d..b1dc35a2d 100644 --- a/App_Function_Libraries/Web_Scraping/Article_Extractor_Lib.py +++ b/App_Function_Libraries/Web_Scraping/Article_Extractor_Lib.py @@ -33,6 +33,8 @@ from playwright.async_api import async_playwright import requests import trafilatura +from tqdm import tqdm + # # Import Local from App_Function_Libraries.DB.DB_Manager import ingest_article_to_db @@ -42,6 +44,8 @@ # Function Definitions # +web_scraping_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ################################################################# # # Scraping-related functions: @@ -62,23 +66,35 @@ def get_page_title(url: str) -> str: async def scrape_article(url: str, custom_cookies: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]: + logging.info(f"Scraping article from URL: {url}") async def fetch_html(url: str) -> str: + logging.info(f"Fetching HTML from {url}") async with async_playwright() as p: browser = await p.chromium.launch(headless=True) - context = await browser.new_context( - user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - ) - if custom_cookies: - await context.add_cookies(custom_cookies) - page = await context.new_page() - await page.goto(url) - await page.wait_for_load_state("networkidle") - content = await page.content() - await browser.close() - log_counter("html_fetched", labels={"url": url}) - return content + try: + context = await browser.new_context( + user_agent=web_scraping_user_agent, + #viewport = {"width": 1280, "height": 720}, + #java_script_enabled = True + ) + if custom_cookies: + await context.add_cookies(custom_cookies) + page = await context.new_page() + try: + await page.goto(url, wait_until="domcontentloaded", timeout=10000) # 10-second timeout + await page.wait_for_load_state("networkidle", timeout=10000) # 10-second timeout + content = await page.content() + logging.info(f"HTML fetched successfully from {url}") + log_counter("html_fetched", labels={"url": url}) + return content + except Exception as e: + logging.error(f"Error fetching HTML for {url}: {e}") + return "" + finally: + await browser.close() def extract_article_data(html: str, url: str) -> dict: + logging.info(f"Extracting article data from HTML for {url}") # FIXME - Add option for extracting comments/tables/images downloaded = trafilatura.extract(html, include_comments=False, include_tables=False, include_images=False) metadata = trafilatura.extract_metadata(html) @@ -93,6 +109,7 @@ def extract_article_data(html: str, url: str) -> dict: } if downloaded: + logging.info(f"Content extracted successfully from {url}") log_counter("article_extracted", labels={"success": "true", "url": url}) # Add metadata to content result['content'] = ContentMetadataHandler.format_content_with_metadata( @@ -122,6 +139,7 @@ def extract_article_data(html: str, url: str) -> dict: return result def convert_html_to_markdown(html: str) -> str: + logging.info("Converting HTML to Markdown") soup = BeautifulSoup(html, 'html.parser') for para in soup.find_all('p'): # Add a newline at the end of each paragraph for markdown separation @@ -133,6 +151,7 @@ def convert_html_to_markdown(html: str) -> str: article_data = extract_article_data(html, url) if article_data['extraction_successful']: article_data['content'] = convert_html_to_markdown(article_data['content']) + logging.info(f"Article content length: {len(article_data['content'])}") log_histogram("article_content_length", len(article_data['content']), labels={"url": url}) return article_data @@ -156,6 +175,9 @@ async def scrape_and_summarize_multiple( results = [] errors = [] + # Create a tqdm progress bar + progress_bar = tqdm(total=len(urls_list), desc="Scraping and Summarizing") + # Loop over each URL to scrape and optionally summarize for i, url in enumerate(urls_list): custom_title = custom_titles[i] if i < len(custom_titles) else None @@ -206,6 +228,12 @@ async def scrape_and_summarize_multiple( error_message = f"Error processing URL {i + 1} ({url}): {str(e)}" errors.append(error_message) logging.error(error_message, exc_info=True) + finally: + # Update the progress bar + progress_bar.update(1) + + # Close the progress bar + progress_bar.close() if errors: logging.error("\n".join(errors)) @@ -804,10 +832,10 @@ def format_content_with_metadata( metadata.update(additional_metadata) formatted_content = f"""{ContentMetadataHandler.METADATA_START} -{json.dumps(metadata, indent=2)} -{ContentMetadataHandler.METADATA_END} - -{content}""" + {json.dumps(metadata, indent=2)} + {ContentMetadataHandler.METADATA_END} + + {content}""" return formatted_content diff --git a/App_Function_Libraries/Web_Scraping/WebSearch_APIs.py b/App_Function_Libraries/Web_Scraping/WebSearch_APIs.py new file mode 100644 index 000000000..48bb4f0c0 --- /dev/null +++ b/App_Function_Libraries/Web_Scraping/WebSearch_APIs.py @@ -0,0 +1,2113 @@ +# WebSearch_APIs.py +# Description: This file contains the functions that are used for performing queries against various Search Engine APIs +# +# Imports +import asyncio +import json +import logging +from html import unescape +import pytest +import random +import re +import time +from typing import Optional, Dict, Any, List +from urllib.parse import urlparse, urlencode, unquote +# +# 3rd-Party Imports +import requests +from lxml.etree import _Element +from lxml.html import document_fromstring +from requests import RequestException +from requests.adapters import HTTPAdapter +from urllib3 import Retry + +from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize +# +# Local Imports +from App_Function_Libraries.Utils.Utils import loaded_config_data +from App_Function_Libraries.Web_Scraping.Article_Extractor_Lib import scrape_article +from App_Function_Libraries.Chat.Chat_Functions import chat_api_call +# +####################################################################################################################### +# +# Functions: +# 1. analyze_question +# +####################################################################################################################### +# +# Functions: + +######################### Main Orchestration Workflow ######################### +# +# FIXME - Add Logging + +def initialize_web_search_results_dict(search_params: Dict) -> Dict: + """ + Initializes and returns a dictionary for storing web search results and metadata. + + Args: + search_params (Dict): A dictionary containing search parameters. + + Returns: + Dict: A dictionary initialized with search metadata. + """ + return { + "search_engine": search_params.get('engine', 'google'), + "search_query": "", + "content_country": search_params.get('content_country', 'US'), + "search_lang": search_params.get('search_lang', 'en'), + "output_lang": search_params.get('output_lang', 'en'), + "result_count": 0, + "date_range": search_params.get('date_range'), + "safesearch": search_params.get('safesearch', 'active'), + "site_blacklist": search_params.get('site_blacklist', []), + "exactTerms": search_params.get('exactTerms'), + "excludeTerms": search_params.get('excludeTerms'), + "filter": search_params.get('filter'), + "geolocation": search_params.get('geolocation'), + "search_result_language": search_params.get('search_result_language'), + "sort_results_by": search_params.get('sort_results_by'), + "results": [], + "total_results_found": 0, + "search_time": 0.0, + "error": None, + "processing_error": None + } + + +def generate_and_search(question: str, search_params: Dict) -> Dict: + """ + Generates sub-queries (if enabled) and performs web searches for each query. + + Args: + question (str): The user's original question or query. + search_params (Dict): A dictionary containing parameters for performing web searches + and specifying LLM endpoints. + + Returns: + Dict: A dictionary containing all search results and related metadata. + + Raises: + ValueError: If the input parameters are invalid. + """ + logging.info(f"Starting generate_and_search with query: {question}") + + # Validate input parameters + if not question or not isinstance(question, str): + raise ValueError("Invalid question parameter") + if not search_params or not isinstance(search_params, dict): + raise ValueError("Invalid search_params parameter") + + # Check for required keys in search_params + required_keys = ["engine", "content_country", "search_lang", "output_lang", "result_count"] + for key in required_keys: + if key not in search_params: + raise ValueError(f"Missing required key in search_params: {key}") + + # 1. Generate sub-queries if requested + logging.info(f"Generating sub-queries for the query: {question}") + sub_query_dict = { + "main_goal": question, + "sub_questions": [], + "search_queries": [], + "analysis_prompt": None + } + + if search_params.get("subquery_generation", False): + logging.info("Sub-query generation enabled") + api_endpoint = search_params.get("subquery_generation_llm", "openai") + sub_query_dict = analyze_question(question, api_endpoint) + + # Merge original question with sub-queries + sub_queries = sub_query_dict.get("sub_questions", []) + logging.info(f"Sub-queries generated: {sub_queries}") + all_queries = [question] + sub_queries + + # 2. Initialize a single web_search_results_dict + web_search_results_dict = initialize_web_search_results_dict(search_params) + web_search_results_dict["search_query"] = question + + # 3. Perform searches and accumulate all raw results + for q in all_queries: + sleep_time = random.uniform(1, 1.5) # Add a random delay to avoid rate limiting + logging.info(f"Performing web search for query: {q}") + raw_results = perform_websearch( + search_engine=search_params.get('engine'), + search_query=q, + content_country=search_params.get('content_country', 'US'), + search_lang=search_params.get('search_lang', 'en'), + output_lang=search_params.get('output_lang', 'en'), + result_count=search_params.get('result_count', 10), + date_range=search_params.get('date_range'), + safesearch=search_params.get('safesearch', 'active'), + site_blacklist=search_params.get('site_blacklist', []), + exactTerms=search_params.get('exactTerms'), + excludeTerms=search_params.get('excludeTerms'), + filter=search_params.get('filter'), + geolocation=search_params.get('geolocation'), + search_result_language=search_params.get('search_result_language'), + sort_results_by=search_params.get('sort_results_by') + ) + + # Debug: Inspect raw results + logging.debug(f"Raw results for query '{q}': {raw_results}") + + # Check for errors or invalid data + if not isinstance(raw_results, dict) or raw_results.get("processing_error"): + logging.error(f"Error or invalid data returned for query '{q}': {raw_results}") + print(f"Error or invalid data returned for query '{q}': {raw_results}") + continue + + logging.info(f"Search results found for query '{q}': {len(raw_results.get('results', []))}") + + # Append results to the single web_search_results_dict + web_search_results_dict["results"].extend(raw_results["results"]) + web_search_results_dict["total_results_found"] += raw_results.get("total_results_found", 0) + web_search_results_dict["search_time"] += raw_results.get("search_time", 0.0) + logging.info(f"Total results found so far: {len(web_search_results_dict['results'])}") + + return { + "web_search_results_dict": web_search_results_dict, + "sub_query_dict": sub_query_dict + } + + +async def analyze_and_aggregate(web_search_results_dict: Dict, sub_query_dict: Dict, search_params: Dict) -> Dict: + logging.info("Starting analyze_and_aggregate") + + # 4. Score/filter results + logging.info("Scoring and filtering search results") + sub_questions = sub_query_dict.get("sub_questions", []) + relevant_results = await search_result_relevance( + web_search_results_dict["results"], + sub_query_dict["main_goal"], + sub_questions, + search_params.get('relevance_analysis_llm') + ) + # FIXME + logging.debug("Relevant results returned by search_result_relevance:") + logging.debug(json.dumps(relevant_results, indent=2)) + + # 5. Allow user to review and select relevant results (if enabled) + logging.info("Reviewing and selecting relevant results") + if search_params.get("user_review", False): + logging.info("User review enabled") + relevant_results = review_and_select_results({"results": list(relevant_results.values())}) + + # 6. Summarize/aggregate final answer + final_answer = aggregate_results( + relevant_results, + sub_query_dict["main_goal"], + sub_questions, + search_params.get('final_answer_llm') + ) + + # 7. Return the final data + logging.info("Returning final websearch results") + return { + "final_answer": final_answer, + "relevant_results": relevant_results, + "web_search_results_dict": web_search_results_dict + } + + +@pytest.mark.asyncio +async def test_perplexity_pipeline(): + # Phase 1: Generate sub-queries and perform web searches + search_params = { + "engine": "google", + "content_country": "countryUS", + "search_lang": "en", + "output_lang": "en", + "result_count": 10, + "date_range": None, + "safesearch": "active", + "site_blacklist": ["spam-site.com"], + "exactTerms": None, + "excludeTerms": None, + "filter": None, + "geolocation": None, + "search_result_language": None, + "sort_results_by": None, + "subquery_generation": True, + "subquery_generation_llm": "openai", + "relevance_analysis_llm": "openai", + "final_answer_llm": "openai" + } + phase1_results = generate_and_search("What is the capital of France?", search_params) + # Review the results here if needed + # Phase 2: Analyze relevance and aggregate final answer + phase2_results = await analyze_and_aggregate(phase1_results["web_search_results_dict"], phase1_results["sub_query_dict"], search_params) + print(phase2_results["final_answer"]) + + +######################### Question Analysis ######################### +# +# +def analyze_question(question: str, api_endpoint) -> Dict: + logging.debug(f"Analyzing question: {question} with API endpoint: {api_endpoint}") + """ + Analyzes the input question and generates sub-questions + + Returns: + Dict containing: + - main_goal: str + - sub_questions: List[str] + - search_queries: List[str] + - analysis_prompt: str + """ + original_query = question + sub_question_generation_prompt = f""" + You are an AI assistant that helps generate search queries. Given an original query, suggest alternative search queries that could help find relevant information. Your goal is to generate queries that are diverse, specific, and highly relevant to the original query, ensuring comprehensive coverage of the topic. + + Important instructions: + 1. Generate between 2 and 6 queries unless a fixed count is specified. Generate more queries for complex or multifaceted topics and fewer for simple or straightforward ones. + 2. Ensure the queries are diverse, covering different aspects or perspectives of the original query, while remaining highly relevant to its core intent. + 3. Prefer specific queries over general ones, as they are more likely to yield targeted and useful results. + 4. If the query involves comparing two topics, generate separate queries for each topic. + 5. If previous queries and an answer are provided, generate new queries that address the shortcomings of the previous answer and avoid repeating the previous queries. + 6. If the original query is broad or ambiguous, generate queries that explore specific subtopics or clarify the intent. + 7. If the query is too specific or unclear, generate queries that explore related or broader topics to ensure useful results. + 8. Return the queries as a JSON array in the format ["query_1", "query_2", ...]. + + Examples: + 1. For the query "What are the benefits of exercise?", generate queries like: + ["health benefits of physical activity", "mental health benefits of exercise", "long-term effects of regular exercise", "how exercise improves cardiovascular health", "role of exercise in weight management"] + + 2. For the query "Compare Python and JavaScript", generate queries like: + ["key features of Python programming language", "advantages of JavaScript for web development", "use cases for Python vs JavaScript", "performance comparison of Python and JavaScript", "ease of learning Python vs JavaScript"] + + 3. For the query "How does climate change affect biodiversity?", generate queries like: + ["impact of climate change on species extinction", "effects of global warming on ecosystems", "role of climate change in habitat loss", "how rising temperatures affect marine biodiversity", "climate change and its impact on migratory patterns"] + + 4. For the query "Best practices for remote work", generate queries like: + ["tips for staying productive while working from home", "how to maintain work-life balance in remote work", "tools for effective remote team collaboration", "managing communication in remote teams", "ergonomic setup for home offices"] + + 5. For the query "What is quantum computing?", generate queries like: + ["basic principles of quantum computing", "applications of quantum computing in real-world problems", "difference between classical and quantum computing", "key challenges in developing quantum computers", "future prospects of quantum computing"] + + Original query: {original_query} + """ + + input_data = "Follow the above instructions." + + sub_questions: List[str] = [] + for attempt in range(3): + try: + logging.info(f"Generating sub-questions (attempt {attempt + 1})") + + response = chat_api_call(api_endpoint, None, input_data, sub_question_generation_prompt, temp=0.7, system_message=None, streaming=False, minp=None, maxp=None, model=None) + if response: + try: + # Try to parse as JSON first + parsed_response = json.loads(response) + sub_questions = parsed_response.get("sub_questions", []) + if sub_questions: + logging.info("Successfully generated sub-questions from JSON") + break + except json.JSONDecodeError: + # If JSON parsing fails, attempt a regex-based fallback + logging.warning("Failed to parse as JSON. Attempting regex extraction.") + matches = re.findall(r'"([^"]*)"', response) + sub_questions = matches if matches else [] + if sub_questions: + logging.info("Successfully extracted sub-questions using regex") + break + + except Exception as e: + logging.error(f"Error generating sub-questions: {str(e)}") + + if not sub_questions: + logging.error("Failed to extract sub-questions from API response after all attempts.") + sub_questions = [original_query] # Fallback to the original query + + # Construct and return the result dictionary + logging.info("Sub-questions generated successfully") + return { + "main_goal": original_query, + "sub_questions": sub_questions, + "search_queries": sub_questions, + "analysis_prompt": sub_question_generation_prompt + } + + +######################### Relevance Analysis ######################### +# +# FIXME - Ensure edge cases are handled properly / Structured outputs? +async def search_result_relevance( + search_results: List[Dict], + original_question: str, + sub_questions: List[str], + api_endpoint: str +) -> Dict[str, Dict]: + """ + Evaluate whether each search result is relevant to the original question and sub-questions. + + Args: + search_results (List[Dict]): List of search results to evaluate. + original_question (str): The original question posed by the user. + sub_questions (List[str]): List of sub-questions generated from the original question. + api_endpoint (str): The LLM or API endpoint to use for relevance analysis. + + Returns: + Dict[str, Dict]: A dictionary of relevant results, keyed by a unique ID or index. + """ + relevant_results = {} + + # Summarization prompt template + summarization_prompt = """ + Summarize the following text in a concise way that captures the key information relevant to this question: "{question}" + + Text to summarize: + {content} + + Instructions: + 1. Focus on information relevant to the question + 2. Keep the summary under 2000 characters + 3. Maintain factual accuracy + 4. Include key details and statistics if present + """ + + for idx, result in enumerate(search_results): + content = result.get("content", "") + if not content: + logging.error("No Content found in search results array!") + continue + + # First, evaluate relevance + eval_prompt = f""" + Given the following search results for the user's question: "{original_question}" and the generated sub-questions: {sub_questions}, evaluate the relevance of the search result to the user's question. + Explain your reasoning for selection. + + Search Results: + {content} + + Instructions: + 1. You MUST only answer TRUE or False while providing your reasoning for your answer. + 2. A result is relevant if the result most likely contains comprehensive and relevant information to answer the user's question. + 3. Provide a brief reason for selection. + + You MUST respond using EXACTLY this format and nothing else: + + Selected Answer: [True or False] + Reasoning: [Your reasoning for the selections] + """ + input_data = "Evaluate the relevance of the search result." + + try: + # Add delay to avoid rate limiting + sleep_time = random.uniform(0.2, 0.6) + await asyncio.sleep(sleep_time) + + # Evaluate relevance + relevancy_result = chat_api_call( + api_endpoint=api_endpoint, + api_key=None, + input_data=input_data, + prompt=eval_prompt, + temp=0.7, + system_message=None, + streaming=False + ) + + # FIXME + logging.debug(f"[DEBUG] Relevancy LLM response for index {idx}:\n{relevancy_result}\n---") + + if relevancy_result: + # Extract the selected answer and reasoning via regex + logging.debug(f"LLM Relevancy Response for item:", relevancy_result) + selected_answer_match = re.search( + r"Selected Answer:\s*(True|False)", + relevancy_result, + re.IGNORECASE + ) + reasoning_match = re.search( + r"Reasoning:\s*(.+)", + relevancy_result, + re.IGNORECASE + ) + + if selected_answer_match and reasoning_match: + is_relevant = selected_answer_match.group(1).strip().lower() == "true" + reasoning = reasoning_match.group(1).strip() + + if is_relevant: + logging.debug("Relevant result found.") + # Use the 'id' from the result if available, otherwise use idx + result_id = result.get("id", str(idx)) + # Scrape the content of the relevant result + scraped_content = await scrape_article(result['url']) + + # Create Summarization prompt + logging.debug(f"Creating Summarization Prompt for result idx={idx}") + summary_prompt = summarization_prompt.format( + question=original_question, + content=scraped_content['content'] + ) + + # Add delay before summarization + await asyncio.sleep(sleep_time) + + # Generate summary using the summarize function + logging.info(f"Summarizing relevant result: ID={result_id}") + summary = summarize( + input_data=scraped_content['content'], + custom_prompt_arg=summary_prompt, + api_name=api_endpoint, + api_key=None, + temp=0.7, + system_message=None, + streaming=False + ) + + relevant_results[result_id] = { + "content": summary, # Store the summary instead of full content + "original_content": scraped_content['content'], # Keep original content if needed + "reasoning": reasoning + } + logging.info(f"Relevant result found and summarized: ID={result_id}; Reasoning={reasoning}") + else: + logging.info(f"Irrelevant result: {reasoning}") + + else: + logging.warning("Failed to parse the API response for relevance analysis.") + except Exception as e: + logging.error(f"Error during relevance evaluation/summarization for result idx={idx}: {e}") + + return relevant_results + + +def review_and_select_results(web_search_results_dict: Dict) -> Dict: + """ + Allows the user to review and select relevant results from the search results. + + Args: + web_search_results_dict (Dict): The dictionary containing all search results. + + Returns: + Dict: A dictionary containing only the user-selected relevant results. + """ + relevant_results = {} + print("Review the search results and select the relevant ones:") + for idx, result in enumerate(web_search_results_dict["results"]): + print(f"\nResult {idx + 1}:") + print(f"Title: {result['title']}") + print(f"URL: {result['url']}") + print(f"Content: {result['content'][:200]}...") # Show a preview of the content + user_input = input("Is this result relevant? (y/n): ").strip().lower() + if user_input == 'y': + relevant_results[str(idx)] = result + + return relevant_results + + +######################### Result Aggregation & Combination ######################### +# +def aggregate_results( + relevant_results: Dict[str, Dict], + question: str, + sub_questions: List[str], + api_endpoint: str +) -> Dict: + """ + Combines and summarizes relevant results into a final answer. + + Args: + relevant_results (Dict[str, Dict]): Dictionary of relevant articles/content. + question (str): Original question. + sub_questions (List[str]): List of sub-questions. + api_endpoint (str): LLM or API endpoint for summarization. + + Returns: + Dict containing: + - summary (str): Final summarized answer. + - evidence (List[Dict]): List of relevant content items included in the summary. + - confidence (float): A rough confidence score (placeholder). + """ + logging.info("Aggregating and summarizing relevant results") + if not relevant_results: + return { + "Report": "No relevant results found. Unable to provide an answer.", + "evidence": [], + "confidence": 0.0 + } + + # FIXME - Add summarization loop + logging.info("Summarizing relevant results") + # ADD Code here to summarize the relevant results + + + # FIXME - Validate and test thoroughly, also structured generation + # Concatenate relevant contents for final analysis + concatenated_texts = "\n\n".join( + f"ID: {rid}\nContent: {res['content']}\nReasoning: {res['reasoning']}" + for rid, res in relevant_results.items() + ) + + current_date = time.strftime("%Y-%m-%d") + + # Aggregation Prompt #1 + analyze_search_results_prompt_1 = f""" + Generate a comprehensive, well-structured, and informative answer for a given question, + using ONLY the information found in the provided web Search Results (URL, Page Title, Summary). + Use an unbiased, journalistic tone, adapting the level of formality to match the user’s question. + + • Cite your statements using [number] notation, placing citations at the end of the relevant sentence. + • Only cite the most relevant results. If multiple sources support the same point, cite all relevant sources [e.g., 1, 2, 3]. + • If sources conflict, present both perspectives clearly and cite the respective sources. + • If different sources refer to different entities with the same name, provide separate answers. + • Do not add any external or fabricated information. + • Do not include URLs or a reference section; cite inline with [number] format only. + • Do not repeat the question or include unnecessary redundancy. + • Use markdown formatting (e.g., **bold**, bullet points, ## headings) to organize the information. + • If the provided results are insufficient to answer the question, explicitly state what information is missing or unclear. + + Structure your answer like this: + 1. **Short introduction**: Briefly summarize the topic (1–2 sentences). + 2. **Bulleted points**: Present key details, each with appropriate citations. + 3. **Conclusion**: Summarize the findings or restate the core answer (with citations if needed). + + Example: + 1. **Short introduction**: This topic explores the impact of climate change on agriculture. + 2. **Bulleted points**: + - Rising temperatures have reduced crop yields in some regions [1]. + - Changes in rainfall patterns are affecting irrigation practices [2, 3]. + 3. **Conclusion**: Climate change poses significant challenges to global agriculture [1, 2, 3]. + + + {concatenated_texts} + + --------------------- + + Make sure to match the language of the user's question. + + Question: {question} + Answer (in the language of the user's question): + """ + + # Aggregation Prompt #2 + analyze_search_results_prompt_2 = f"""INITIAL_QUERY: Here are some sources {concatenated_texts}. Read these carefully, as you will be asked a Query about them. + # General Instructions + + Write an accurate, detailed, and comprehensive response to the user's query located at INITIAL_QUERY. Additional context is provided as "USER_INPUT" after specific questions. Your answer should be informed by the provided "Search results". Your answer must be precise, of high-quality, and written by an expert using an unbiased and journalistic tone. Your answer must be written in the same language as the query, even if language preference is different. + + You MUST cite the most relevant search results that answer the query. Do not mention any irrelevant results. You MUST ADHERE to the following instructions for citing search results: + - to cite a search result, enclose its index located above the summary with brackets at the end of the corresponding sentence, for example "Ice is less dense than water[1][2]." or "Paris is the capital of France[1][4][5]." + - NO SPACE between the last word and the citation, and ALWAYS use brackets. Only use this format to cite search results. NEVER include a References section at the end of your answer. + - If you don't know the answer or the premise is incorrect, explain why. + If the search results are empty or unhelpful, answer the query as well as you can with existing knowledge. + + You MUST NEVER use moralization or hedging language. AVOID using the following phrases: + - "It is important to ..." + - "It is inappropriate ..." + - "It is subjective ..." + + You MUST ADHERE to the following formatting instructions: + - Use markdown to format paragraphs, lists, tables, and quotes whenever possible. + - Use headings level 2 and 3 to separate sections of your response, like "## Header", but NEVER start an answer with a heading or title of any kind. + - Use single new lines for lists and double new lines for paragraphs. + - Use markdown to render images given in the search results. + - NEVER write URLs or links. + + # Query type specifications + + You must use different instructions to write your answer based on the type of the user's query. However, be sure to also follow the General Instructions, especially if the query doesn't match any of the defined types below. Here are the supported types. + + ## Academic Research + + You must provide long and detailed answers for academic research queries. Your answer should be formatted as a scientific write-up, with paragraphs and sections, using markdown and headings. + + ## Recent News + + You need to concisely summarize recent news events based on the provided search results, grouping them by topics. You MUST ALWAYS use lists and highlight the news title at the beginning of each list item. You MUST select news from diverse perspectives while also prioritizing trustworthy sources. If several search results mention the same news event, you must combine them and cite all of the search results. Prioritize more recent events, ensuring to compare timestamps. You MUST NEVER start your answer with a heading of any kind. + + ## Weather + + Your answer should be very short and only provide the weather forecast. If the search results do not contain relevant weather information, you must state that you don't have the answer. + + ## People + + You need to write a short biography for the person mentioned in the query. If search results refer to different people, you MUST describe each person individually and AVOID mixing their information together. NEVER start your answer with the person's name as a header. + + ## Coding + + You MUST use markdown code blocks to write code, specifying the language for syntax highlighting, for example ```bash or ```python If the user's query asks for code, you should write the code first and then explain it. + + ## Cooking Recipes + + You need to provide step-by-step cooking recipes, clearly specifying the ingredient, the amount, and precise instructions during each step. + + ## Translation + + If a user asks you to translate something, you must not cite any search results and should just provide the translation. + + ## Creative Writing + + If the query requires creative writing, you DO NOT need to use or cite search results, and you may ignore General Instructions pertaining only to search. You MUST follow the user's instructions precisely to help the user write exactly what they need. + + ## Science and Math + + If the user query is about some simple calculation, only answer with the final result. Follow these rules for writing formulas: + - Always use \( and\) for inline formulas and\[ and\] for blocks, for example\(x^4 = x - 3 \) + - To cite a formula add citations to the end, for example\[ \sin(x) \] [1][2] or \(x^2-2\) [4]. + - Never use $ or $$ to render LaTeX, even if it is present in the user query. + - Never use unicode to render math expressions, ALWAYS use LaTeX. + - Never use the \label instruction for LaTeX. + + ## URL Lookup + + When the user's query includes a URL, you must rely solely on information from the corresponding search result. DO NOT cite other search results, ALWAYS cite the first result, e.g. you need to end with [1]. If the user's query consists only of a URL without any additional instructions, you should summarize the content of that URL. + + ## Shopping + + If the user query is about shopping for a product, you MUST follow these rules: + - Organize the products into distinct sectors. For example, you could group shoes by style (boots, sneakers, etc.) + - Cite at most 9 search results using the format provided in General Instructions to avoid overwhelming the user with too many options. + + The current date is: {current_date} + + The user's query is: {question} + """ + + input_data = "Follow the above instructions." + + try: + logging.info("Generating the report") + returned_response = chat_api_call( + api_endpoint=api_endpoint, + api_key=None, + input_data=input_data, + prompt=analyze_search_results_prompt_2, + temp=0.7, + system_message=None, + streaming=False + ) + logging.debug(f"Returned response from LLM: {returned_response}") + if returned_response: + # You could do further parsing or confidence estimation here + return { + "Report": returned_response, + "evidence": list(relevant_results.values()), + "confidence": 0.9 # Hardcoded or computed as needed + } + except Exception as e: + logging.error(f"Error aggregating results: {e}") + + logging.error("Could not create the report due to an error.") + return { + "summary": "Could not create the report due to an error.", + "evidence": list(relevant_results.values()), + "confidence": 0.0 + } + +# +# End of Orchestration functions +####################################################################################################################### + + +####################################################################################################################### +# +# Search Engine Functions + +# FIXME +def perform_websearch(search_engine, search_query, content_country, search_lang, output_lang, result_count, date_range=None, + safesearch=None, site_blacklist=None, exactTerms=None, excludeTerms=None, filter=None, geolocation=None, search_result_language=None, sort_results_by=None): + try: + if search_engine.lower() == "baidu": + web_search_results = search_web_baidu(search_query, None, None) + + elif search_engine.lower() == "bing": + # Prepare the arguments for search_web_bing + bing_args = { + "search_query": search_query, + "bing_lang": search_lang, + "bing_country": content_country, + "result_count": result_count, + "bing_api_key": loaded_config_data['search_engines'].get('bing_api_key'), # Fetch Bing API key from config + "date_range": date_range, + } + + # Call the search_web_bing function with the prepared arguments + web_search_results = search_web_bing(**bing_args) + + elif search_engine.lower() == "brave": + web_search_results = search_web_brave(search_query, content_country, search_lang, output_lang, result_count, safesearch, + site_blacklist, date_range) + + elif search_engine.lower() == "duckduckgo": + # Prepare the arguments for search_web_duckduckgo + ddg_args = { + "keywords": search_query, + "region": f"{content_country.lower()}-{search_lang.lower()}", # Format: "us-en" + "timelimit": date_range[0] if date_range else None, # Use first character of date_range (e.g., "y" -> "y") + "max_results": result_count, + } + + # Call the search_web_duckduckgo function with the prepared arguments + ddg_results = search_web_duckduckgo(**ddg_args) + + # Wrap the results in a dictionary to match the expected format + web_search_results = {"results": ddg_results} + + elif search_engine.lower() == "google": + # Convert site_blacklist list to a comma-separated string + if site_blacklist and isinstance(site_blacklist, list): + site_blacklist = ",".join(site_blacklist) + + # Prepare the arguments for search_web_google + google_args = { + "search_query": search_query, + "google_search_api_key": loaded_config_data['search_engines']['google_search_api_key'], + "google_search_engine_id": loaded_config_data['search_engines']['google_search_engine_id'], + "result_count": result_count, + "c2coff": "1", # Default value + "results_origin_country": content_country, + "ui_language": output_lang, + "search_result_language": search_result_language or "lang_en", # Default value + "geolocation": geolocation or "us", # Default value + "safesearch": safesearch or "off", # Default value, + } + + # If site_blacklist has multiple domains, do not use siteSearch + if site_blacklist and len(site_blacklist) == 1: + google_args["siteSearch"] = site_blacklist[0] + google_args["siteSearchFilter"] = "e" + else: + # Do not use siteSearch for multiple domains + # Either skip it entirely or see Option 2 below + google_args.pop("siteSearch", None) + google_args.pop("siteSearchFilter", None) + + # Add optional parameters only if they are provided + if date_range: + google_args["date_range"] = date_range + if exactTerms: + google_args["exactTerms"] = exactTerms + if excludeTerms: + google_args["excludeTerms"] = excludeTerms + if filter: + google_args["filter"] = filter + if site_blacklist: + google_args["site_blacklist"] = site_blacklist + if sort_results_by: + google_args["sort_results_by"] = sort_results_by + + # Call the search_web_google function with the prepared arguments + web_search_results = search_web_google(**google_args) # raw JSON + web_search_results_dict = process_web_search_results(web_search_results, "google") + return web_search_results_dict + + elif search_engine.lower() == "kagi": + web_search_results = search_web_kagi(search_query, content_country) + + elif search_engine.lower() == "serper": + web_search_results = search_web_serper() + + elif search_engine.lower() == "tavily": + web_search_results = search_web_tavily(search_query, result_count, site_blacklist) + + elif search_engine.lower() == "searx": + web_search_results = search_web_searx(search_query, language='auto', time_range='', safesearch=0, pageno=1, categories='general') + + elif search_engine.lower() == "yandex": + web_search_results = search_web_yandex() + + else: + return f"Error: Invalid Search Engine Name {search_engine}" + + # Process the raw search results + web_search_results_dict = process_web_search_results(web_search_results, search_engine) + # FIXME + #logging.debug("After process_web_search_results:") + #logging.debug(json.dumps(web_search_results_dict, indent=2)) + return web_search_results_dict + + except Exception as e: + return {"processing_error": f"Error performing web search: {str(e)}"} + + +def test_perform_websearch_google(): + # Google Searches + try: + test_1 = perform_websearch("google", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 1: {test_1}") + # FIXME - Fails. Need to fix arg formatting + test_2 = perform_websearch("google", "What is the capital of France?", "US", "en", "en", 10, date_range="y", safesearch="active", site_blacklist=["spam-site.com"]) + print(f"Test 2: {test_2}") + test_3 = results = perform_websearch("google", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 3: {test_3}") + except Exception as e: + print(f"Error performing google searches: {str(e)}") + pass + + +def test_perform_websearch_bing(): + # Bing Searches + try: + test_4 = perform_websearch("bing", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 4: {test_4}") + test_5 = perform_websearch("bing", "What is the capital of France?", "US", "en", "en", 10, date_range="y") + print(f"Test 5: {test_5}") + except Exception as e: + print(f"Error performing bing searches: {str(e)}") + + +def test_perform_websearch_brave(): + # Brave Searches + try: + test_7 = perform_websearch("brave", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 7: {test_7}") + except Exception as e: + print(f"Error performing brave searches: {str(e)}") + + +def test_perform_websearch_ddg(): + # DuckDuckGo Searches + try: + test_6 = perform_websearch("duckduckgo", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 6: {test_6}") + test_7 = perform_websearch("duckduckgo", "What is the capital of France?", "US", "en", "en", 10, date_range="y") + print(f"Test 7: {test_7}") + except Exception as e: + print(f"Error performing duckduckgo searches: {str(e)}") + + +# FIXME +def test_perform_websearch_kagi(): + # Kagi Searches + try: + test_8 = perform_websearch("kagi", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 8: {test_8}") + except Exception as e: + print(f"Error performing kagi searches: {str(e)}") + +# FIXME +def test_perform_websearch_serper(): + # Serper Searches + try: + test_9 = perform_websearch("serper", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 9: {test_9}") + except Exception as e: + print(f"Error performing serper searches: {str(e)}") + +# FIXME +def test_perform_websearch_tavily(): + # Tavily Searches + try: + test_10 = perform_websearch("tavily", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 10: {test_10}") + except Exception as e: + print(f"Error performing tavily searches: {str(e)}") + + +# FIXME +def test_perform_websearch_searx(): + # Searx Searches + try: + test_11 = perform_websearch("searx", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 11: {test_11}") + except Exception as e: + print(f"Error performing searx searches: {str(e)}") + + +# FIXME +def test_perform_websearch_yandex(): + #Yandex Searches + try: + test_12 = perform_websearch("yandex", "What is the capital of France?", "US", "en", "en", 10) + print(f"Test 12: {test_12}") + except Exception as e: + print(f"Error performing yandex searches: {str(e)}") + pass + +# +######################### Search Result Parsing ################################################################## +# + +def process_web_search_results(search_results: Dict, search_engine: str) -> Dict: + """ + Processes search results from a search engine and formats them into a standardized dictionary structure. + + Args: + search_results (Dict): The raw search results from the search engine. + search_engine (str): The name of the search engine (e.g., "Google", "Bing"). + + Returns: + Dict: A dictionary containing the processed search results in the specified structure. + + web_search_results_dict = { + "search_engine": search_engine, + "search_query": search_results.get("search_query", ""), + "content_country": search_results.get("content_country", ""), + "search_lang": search_results.get("search_lang", ""), + "output_lang": search_results.get("output_lang", ""), + "result_count": search_results.get("result_count", 0), + "date_range": search_results.get("date_range", None), + "safesearch": search_results.get("safesearch", None), + "site_blacklist": search_results.get("site_blacklist", None), + "exactTerms": search_results.get("exactTerms", None), + "excludeTerms": search_results.get("excludeTerms", None), + "filter": search_results.get("filter", None), + "geolocation": search_results.get("geolocation", None), + "search_result_language": search_results.get("search_result_language", None), + "sort_results_by": search_results.get("sort_results_by", None), + "results": [ + { + "title": str, + "url": str, + "content": str, + "metadata": { + "date_published": Optional[str], + "author": Optional[str], + "source": Optional[str], + "language": Optional[str], + "relevance_score": Optional[float], + "snippet": Optional[str] + } + }, + "total_results_found": search_results.get("total_results_found", 0), + "search_time": search_results.get("search_time", 0.0), + "error": search_results.get("error", None), + "processing_error": None + } + """ + # Validate input parameters + if not isinstance(search_results, dict): + raise TypeError("search_results must be a dictionary") + + # Initialize the output dictionary with default values + web_search_results_dict = { + "search_engine": search_engine, + "search_query": search_results.get("search_query", ""), + "content_country": search_results.get("content_country", ""), + "search_lang": search_results.get("search_lang", ""), + "output_lang": search_results.get("output_lang", ""), + "result_count": search_results.get("result_count", 0), + "date_range": search_results.get("date_range", None), + "safesearch": search_results.get("safesearch", None), + "site_blacklist": search_results.get("site_blacklist", None), + "exactTerms": search_results.get("exactTerms", None), + "excludeTerms": search_results.get("excludeTerms", None), + "filter": search_results.get("filter", None), + "geolocation": search_results.get("geolocation", None), + "search_result_language": search_results.get("search_result_language", None), + "sort_results_by": search_results.get("sort_results_by", None), + "results": [], + "total_results_found": search_results.get("total_results_found", 0), + "search_time": search_results.get("search_time", 0.0), + "error": search_results.get("error", None), + "processing_error": None + } + try: + # Parse results based on the search engine + if search_engine.lower() == "baidu": + pass # Placeholder for Baidu-specific parsing + elif search_engine.lower() == "bing": + parsed_results = parse_bing_results(search_results, web_search_results_dict) + elif search_engine.lower() == "brave": + parsed_results = parse_brave_results(search_results, web_search_results_dict) + elif search_engine.lower() == "duckduckgo": + parsed_results = parse_duckduckgo_results(search_results, web_search_results_dict) + elif search_engine.lower() == "google": + parsed_results = parse_google_results(search_results, web_search_results_dict) + elif search_engine.lower() == "kagi": + parsed_results = parse_kagi_results(search_results, web_search_results_dict) + elif search_engine.lower() == "serper": + parsed_results = parse_serper_results(search_results, web_search_results_dict) + elif search_engine.lower() == "tavily": + parsed_results = parse_tavily_results(search_results, web_search_results_dict) + elif search_engine.lower() == "searx": + parsed_results = parse_searx_results(search_results, web_search_results_dict) + elif search_engine.lower() == "yandex": + parsed_results = parse_yandex_results(search_results, web_search_results_dict) + else: + raise ValueError(f"Error: Invalid Search Engine Name {search_engine}") + + except Exception as e: + web_search_results_dict["processing_error"] = f"Error processing search results: {str(e)}" + logging.error(f"Error in process_web_search_results: {str(e)}") + + return web_search_results_dict + + +def parse_html_search_results_generic(soup): + results = [] + for result in soup.find_all('div', class_='result'): + title = result.find('h3').text if result.find('h3') else '' + url = result.find('a', class_='url')['href'] if result.find('a', class_='url') else '' + content = result.find('p', class_='content').text if result.find('p', class_='content') else '' + published_date = result.find('span', class_='published_date').text if result.find('span', + class_='published_date') else '' + + results.append({ + 'title': title, + 'url': url, + 'content': content, + 'publishedDate': published_date + }) + return results + + +######################### Baidu Search ######################### +# +# https://cloud.baidu.com/doc/APIGUIDE/s/Xk1myz05f +# https://oxylabs.io/blog/how-to-scrape-baidu-search-results +def search_web_baidu(arg1, arg2, arg3): + pass + + +def test_baidu_search(arg1, arg2, arg3): + result = search_web_baidu(arg1, arg2, arg3) + return result + +def search_parse_baidu_results(): + pass + + +######################### Bing Search ######################### +# +# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/overview0 +# https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/overview +# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python +# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/query-parameters +# Country/Language code: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes#country-codes +# https://github.com/Azure-Samples/cognitive-services-REST-api-samples/tree/master/python/Search +def search_web_bing(search_query, bing_lang, bing_country, result_count=None, bing_api_key=None, + date_range=None): + # Load Search API URL from config file + search_url = loaded_config_data['search_engines']['bing_search_api_url'] + + if not bing_api_key: + # load key from config file + bing_api_key = loaded_config_data['search_engines']['bing_search_api_key'] + if not bing_api_key: + raise ValueError("Please Configure a valid Bing Search API key") + + if not result_count: + # Perform check in config file for default search result count + answer_count = loaded_config_data['search_engines']['search_result_max'] + else: + answer_count = result_count + + # date_range = "day", "week", "month", or `YYYY-MM-DD..YYYY-MM-DD` + if not date_range: + date_range = None + + # Language settings + if not bing_lang: + # do config check for default search language + setlang = bing_lang + + # Returns content for this Country market code + if not bing_country: + # do config check for default search country + bing_country = loaded_config_data['search_engines']['bing_country_code'] + else: + setcountry = bing_country + # Construct a request + mkt = 'en-US' + params = {'q': search_query, 'mkt': mkt} +# params = {"q": search_query, "mkt": bing_country, "textDecorations": True, "textFormat": "HTML", "count": answer_count, +# "freshness": date_range, "promote": "webpages", "safeSearch": "Moderate"} + headers = {'Ocp-Apim-Subscription-Key': bing_api_key} + + # Call the API + try: + response = requests.get(search_url, headers=headers, params=params) + response.raise_for_status() + + logging.debug("Headers: ") + logging.debug(response.headers) + + logging.debug("JSON Response: ") + logging.debug(response.json()) + bing_search_results = response.json() + return bing_search_results + except Exception as ex: + raise ex + + +def test_search_web_bing(): + search_query = "How can I get started learning machine learning?" + bing_lang = "en" + bing_country = "US" + result_count = 10 + bing_api_key = None + date_range = None + result = search_web_bing(search_query, bing_lang, bing_country, result_count, bing_api_key, date_range) + # Unparsed results + print("Bing Search Results:") + print(result) + # Parsed results + output_dict = {"results": []} + parse_bing_results(result, output_dict) + print("Parsed Bing Results:") + print(json.dumps(output_dict, indent=2)) + + +def parse_bing_results(raw_results: Dict, output_dict: Dict) -> None: + """ + Parse Bing search results and update the output dictionary + + Args: + raw_results (Dict): Raw Bing API response + output_dict (Dict): Dictionary to store processed results + """ + logging.info(f"Raw Bing results received: {json.dumps(raw_results, indent=2)}") + try: + # Initialize results list if not present + if "results" not in output_dict: + output_dict["results"] = [] + + # Extract web pages results + if "webPages" in raw_results: + web_pages = raw_results["webPages"] + output_dict["total_results_found"] = web_pages.get("totalEstimatedMatches", 0) + + for result in web_pages.get("value", []): + processed_result = { + "title": result.get("name", ""), + "url": result.get("url", ""), + "content": result.get("snippet", ""), + "metadata": { + "date_published": None, # Bing doesn't typically provide this + "author": None, # Bing doesn't typically provide this + "source": result.get("displayUrl", None), + "language": None, # Could be extracted from result.get("language") if available + "relevance_score": None, # Could be calculated from result.get("rank") if available + "snippet": result.get("snippet", None) + } + } + output_dict["results"].append(processed_result) + + # Optionally process other result types + if "news" in raw_results: + for news_item in raw_results["news"].get("value", []): + processed_result = { + "title": news_item.get("name", ""), + "url": news_item.get("url", ""), + "content": news_item.get("description", ""), + "metadata": { + "date_published": news_item.get("datePublished", None), + "author": news_item.get("provider", [{}])[0].get("name", None), + "source": news_item.get("provider", [{}])[0].get("name", None), + "language": None, + "relevance_score": None, + "snippet": news_item.get("description", None) + } + } + output_dict["results"].append(processed_result) + + # Add spell suggestions if available + if "spellSuggestion" in raw_results: + output_dict["spell_suggestions"] = raw_results["spellSuggestion"] + + # Add related searches if available + if "relatedSearches" in raw_results: + output_dict["related_searches"] = [ + item.get("text", "") + for item in raw_results["relatedSearches"].get("value", []) + ] + + except Exception as e: + logging.error(f"Error processing Bing results: {str(e)}") + output_dict["processing_error"] = f"Error processing Bing results: {str(e)}" + + +######################### Brave Search ######################### +# +# https://brave.com/search/api/ +# https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-brave-search/README.md +def search_web_brave(search_term, country, search_lang, ui_lang, result_count, safesearch="moderate", + brave_api_key=None, result_filter=None, search_type="ai", date_range=None): + search_url = "https://api.search.brave.com/res/v1/web/search" + if not brave_api_key and search_type == "web": + # load key from config file + brave_api_key = loaded_config_data['search_engines']['brave_search_api_key'] + if not brave_api_key: + raise ValueError("Please provide a valid Brave Search API subscription key") + if not country: + brave_country = loaded_config_data['search_engines']['search_engine_country_code_brave'] + else: + country = "US" + if not search_lang: + search_lang = "en" + if not ui_lang: + ui_lang = "en" + if not result_count: + result_count = 10 + # if not date_range: + # date_range = "month" + if not result_filter: + result_filter = "webpages" + if search_type == "ai": + brave_api_key = loaded_config_data['search_engines']['brave_search_ai_api_key'] + else: + raise ValueError("Invalid search type. Please choose 'ai' or 'web'.") + + + headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": brave_api_key} + + # https://api.search.brave.com/app/documentation/web-search/query#WebSearchAPIQueryParameters + params = {"q": search_term, "textDecorations": True, "textFormat": "HTML", "count": result_count, + "freshness": date_range, "promote": "webpages", "safeSearch": "Moderate"} + + response = requests.get(search_url, headers=headers, params=params) + response.raise_for_status() + # Response: https://api.search.brave.com/app/documentation/web-search/responses#WebSearchApiResponse + brave_search_results = response.json() + return brave_search_results + + +def test_search_brave(): + search_term = "How can I bake a cherry cake" + country = "US" + search_lang = "en" + ui_lang = "en" + result_count = 10 + safesearch = "moderate" + date_range = None + result_filter = None + result = search_web_brave(search_term, country, search_lang, ui_lang, result_count, safesearch, date_range, + result_filter) + print("Brave Search Results:") + print(result) + + output_dict = {"results": []} + parse_brave_results(result, output_dict) + print("Parsed Brave Results:") + print(json.dumps(output_dict, indent=2)) + + +def parse_brave_results(raw_results: Dict, output_dict: Dict) -> None: + """ + Parse Brave search results and update the output dictionary + + Args: + raw_results (Dict): Raw Brave API response + output_dict (Dict): Dictionary to store processed results + """ + try: + # Initialize results list if not present + if "results" not in output_dict: + output_dict["results"] = [] + + # Extract query information + if "query" in raw_results: + query_info = raw_results["query"] + output_dict.update({ + "search_query": query_info.get("original", ""), + "content_country": query_info.get("country", ""), + "city": query_info.get("city", ""), + "state": query_info.get("state", ""), + "more_results_available": query_info.get("more_results_available", False) + }) + + # Process web results + if "web" in raw_results and "results" in raw_results["web"]: + for result in raw_results["web"]["results"]: + processed_result = { + "title": result.get("title", ""), + "url": result.get("url", ""), + "content": result.get("description", ""), + "metadata": { + "date_published": result.get("page_age", None), + "author": None, + "source": result.get("profile", {}).get("name", None), + "language": result.get("language", None), + "relevance_score": None, + "snippet": result.get("description", None), + "family_friendly": result.get("family_friendly", None), + "type": result.get("type", None), + "subtype": result.get("subtype", None), + "thumbnail": result.get("thumbnail", {}).get("src", None) + } + } + output_dict["results"].append(processed_result) + + # Update total results count + if "mixed" in raw_results: + output_dict["total_results_found"] = len(raw_results["mixed"].get("main", [])) + + # Set family friendly status + if "mixed" in raw_results: + output_dict["family_friendly"] = raw_results.get("family_friendly", True) + + except Exception as e: + logging.error(f"Error processing Brave results: {str(e)}") + output_dict["processing_error"] = f"Error processing Brave results: {str(e)}" + +def test_parse_brave_results(): + pass + + +######################### DuckDuckGo Search ######################### +# +# https://github.com/deedy5/duckduckgo_search +# Copied request format/structure from https://github.com/deedy5/duckduckgo_search/blob/main/duckduckgo_search/duckduckgo_search.py +def create_session() -> requests.Session: + session = requests.Session() + retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[429, 500, 502, 503, 504]) + session.mount('https://', HTTPAdapter(max_retries=retries)) + return session + +def search_web_duckduckgo( + keywords: str, + region: str = "wt-wt", + timelimit: str | None = None, + max_results: int | None = None, +) -> list[dict[str, str]]: + assert keywords, "keywords is mandatory" + + payload = { + "q": keywords, + "s": "0", + "o": "json", + "api": "d.js", + "vqd": "", + "kl": region, + "bing_market": region, + } + + def _normalize_url(url: str) -> str: + """Unquote URL and replace spaces with '+'.""" + return unquote(url).replace(" ", "+") if url else "" + + def _normalize(raw_html: str) -> str: + """Strip HTML tags from the raw_html string.""" + REGEX_STRIP_TAGS = re.compile("<.*?>") + return unescape(REGEX_STRIP_TAGS.sub("", raw_html)) if raw_html else "" + + if timelimit: + payload["df"] = timelimit + + cache = set() + results: list[dict[str, str]] = [] + + for _ in range(5): + response = requests.post("https://html.duckduckgo.com/html", data=payload) + resp_content = response.content + if b"No results." in resp_content: + return results + + tree = document_fromstring(resp_content) + elements = tree.xpath("//div[h2]") + if not isinstance(elements, list): + return results + + for e in elements: + if isinstance(e, _Element): + hrefxpath = e.xpath("./a/@href") + href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None + if ( + href + and href not in cache + and not href.startswith( + ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain") + ) + ): + cache.add(href) + titlexpath = e.xpath("./h2/a/text()") + title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else "" + bodyxpath = e.xpath("./a//text()") + body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else "" + results.append( + { + "title": _normalize(title), + "href": _normalize_url(href), + "body": _normalize(body), + } + ) + if max_results and len(results) >= max_results: + return results + + npx = tree.xpath('.//div[@class="nav-link"]') + if not npx or not max_results: + return results + next_page = npx[-1] if isinstance(npx, list) else None + if isinstance(next_page, _Element): + names = next_page.xpath('.//input[@type="hidden"]/@name') + values = next_page.xpath('.//input[@type="hidden"]/@value') + if isinstance(names, list) and isinstance(values, list): + payload = {str(n): str(v) for n, v in zip(names, values)} + + return results + + +def test_search_duckduckgo(): + try: + results = search_web_duckduckgo( + keywords="How can I bake a cherry cake?", + region="us-en", + timelimit="w", + max_results=10 + ) + print(f"Number of results: {len(results)}") + for result in results: + print(f"Title: {result['title']}") + print(f"URL: {result['href']}") + print(f"Snippet: {result['body']}") + print("---") + + # Parse the results + output_dict = {"results": []} + parse_duckduckgo_results({"results": results}, output_dict) + print("Parsed DuckDuckGo Results:") + print(json.dumps(output_dict, indent=2)) + + except ValueError as e: + print(f"Invalid input: {str(e)}") + except requests.RequestException as e: + print(f"Request error: {str(e)}") + + +def parse_duckduckgo_results(raw_results: Dict, output_dict: Dict) -> None: + """ + Parse DuckDuckGo search results and update the output dictionary + + Args: + raw_results (Dict): Raw DuckDuckGo response + output_dict (Dict): Dictionary to store processed results + """ + try: + # Initialize results list if not present + if "results" not in output_dict: + output_dict["results"] = [] + + # DuckDuckGo results are in a list of dictionaries + results = raw_results.get("results", []) + + for result in results: + # Extract information directly from the dictionary + title = result.get("title", "") + url = result.get("href", "") + snippet = result.get("body", "") + + # Log warnings for missing data + if not title: + logging.warning("Missing title in result") + if not url: + logging.warning("Missing URL in result") + if not snippet: + logging.warning("Missing snippet in result") + + # Add the processed result to the output dictionary + processed_result = { + "title": title, + "url": url, + "content": snippet, + "metadata": { + "date_published": None, # DuckDuckGo doesn't typically provide this + "author": None, # DuckDuckGo doesn't typically provide this + "source": extract_domain(url) if url else None, + "language": None, # DuckDuckGo doesn't typically provide this + "relevance_score": None, # DuckDuckGo doesn't typically provide this + "snippet": snippet + } + } + + output_dict["results"].append(processed_result) + + # Update total results count + output_dict["total_results_found"] = len(output_dict["results"]) + + except Exception as e: + logging.error(f"Error processing DuckDuckGo results: {str(e)}") + output_dict["processing_error"] = f"Error processing DuckDuckGo results: {str(e)}" + + +def extract_domain(url: str) -> str: + """ + Extract domain name from URL + + Args: + url (str): Full URL + + Returns: + str: Domain name + """ + try: + from urllib.parse import urlparse + parsed_uri = urlparse(url) + domain = parsed_uri.netloc + return domain.replace('www.', '') + except: + return url + + +def test_parse_duckduckgo_results(): + pass + + + +######################### Google Search ######################### +# +# https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list +def search_web_google( + search_query: str, + google_search_api_key: Optional[str] = None, + google_search_engine_id: Optional[str] = None, + result_count: Optional[int] = None, + c2coff: Optional[str] = None, + results_origin_country: Optional[str] = None, + date_range: Optional[str] = None, + exactTerms: Optional[str] = None, + excludeTerms: Optional[str] = None, + filter: Optional[str] = None, + geolocation: Optional[str] = None, + ui_language: Optional[str] = None, + search_result_language: Optional[str] = None, + safesearch: Optional[str] = None, + site_blacklist: Optional[str] = None, + sort_results_by: Optional[str] = None +) -> Dict[str, Any]: + """ + Perform a Google web search with the given parameters. + + :param search_query: The search query string + :param google_search_api_key: Google Search API key + :param google_search_engine_id: Google Search Engine ID + :param result_count: Number of results to return + :param c2coff: Enable/disable traditional Chinese search + :param results_origin_country: Limit results to a specific country + :param date_range: Limit results to a specific date range + :param exactTerms: Exact terms that must appear in results + :param excludeTerms: Terms that must not appear in results + :param filter: Control duplicate content filter + :param geolocation: Geolocation of the user + :param ui_language: Language of the user interface + :param search_result_language: Language of search results + :param safesearch: Safe search setting + :param site_blacklist: Single Site to exclude from search + :param sort_results_by: Sorting criteria for results + :return: JSON response from Google Search API + """ + try: + # Load Search API URL from config file + search_url = loaded_config_data['search_engines']['google_search_api_url'] + logging.info(f"Using search URL: {search_url}") + + # Initialize params dictionary + params: Dict[str, Any] = {"q": search_query} + + # Handle c2coff + if c2coff is None: + c2coff = loaded_config_data['search_engines']['google_simp_trad_chinese'] + if c2coff is not None: + params["c2coff"] = c2coff + + # Handle results_origin_country + if results_origin_country is None: + limit_country_search = loaded_config_data['search_engines']['limit_google_search_to_country'] + if limit_country_search: + results_origin_country = loaded_config_data['search_engines']['google_search_country'] + if results_origin_country: + params["cr"] = results_origin_country + + # Handle google_search_engine_id + if google_search_engine_id is None: + google_search_engine_id = loaded_config_data['search_engines']['google_search_engine_id'] + if not google_search_engine_id: + raise ValueError("Please set a valid Google Search Engine ID in the config file") + params["cx"] = google_search_engine_id + + # Handle google_search_api_key + if google_search_api_key is None: + google_search_api_key = loaded_config_data['search_engines']['google_search_api_key'] + if not google_search_api_key: + raise ValueError("Please provide a valid Google Search API subscription key") + params["key"] = google_search_api_key + + # Handle other parameters + if result_count: + params["num"] = result_count + if date_range: + params["dateRestrict"] = date_range + if exactTerms: + params["exactTerms"] = exactTerms + if excludeTerms: + params["excludeTerms"] = excludeTerms + if filter: + params["filter"] = filter + if geolocation: + params["gl"] = geolocation + if ui_language: + params["hl"] = ui_language + if search_result_language: + params["lr"] = search_result_language + if safesearch is None: + safesearch = loaded_config_data['search_engines']['google_safe_search'] + if safesearch: + params["safe"] = safesearch + if sort_results_by: + params["sort"] = sort_results_by + + logging.info(f"Prepared parameters for Google Search: {params}") + + # Make the API call + response = requests.get(search_url, params=params) + response.raise_for_status() + google_search_results = response.json() + + logging.info(f"Successfully retrieved search results. Items found: {len(google_search_results.get('items', []))}") + + return google_search_results + + except ValueError as ve: + logging.error(f"Configuration error: {str(ve)}") + raise + + except RequestException as re: + logging.error(f"Error during API request: {str(re)}") + raise + + except Exception as e: + logging.error(f"Unexpected error occurred: {str(e)}") + raise + + +def test_search_google(): + search_query = "How can I bake a cherry cake?" + google_search_api_key = loaded_config_data['search_engines']['google_search_api_key'] + google_search_engine_id = loaded_config_data['search_engines']['google_search_engine_id'] + result_count = 10 + c2coff = "1" + results_origin_country = "countryUS" + date_range = None + exactTerms = None + excludeTerms = None + filter = None + geolocation = "us" + ui_language = "en" + search_result_language = "lang_en" + safesearch = "off" + site_blacklist = None + sort_results_by = None + result = search_web_google(search_query, + google_search_api_key, + google_search_engine_id, + result_count, + c2coff, + results_origin_country, + date_range, + exactTerms, + excludeTerms, + filter, + geolocation, + ui_language, + search_result_language, + safesearch, + site_blacklist, + sort_results_by + ) + print(result) + return result + + +def parse_google_results(raw_results: Dict, output_dict: Dict) -> None: + """ + Parse Google Custom Search API results and update the output dictionary. + + Args: + raw_results (Dict): Raw Google API response. + output_dict (Dict): Dictionary to store processed results. + """ + logging.info(f"Raw results received: {json.dumps(raw_results, indent=2)}") + # For debugging only FIXME + logging.debug("Raw web_search_results from Google:") + logging.debug(json.dumps(raw_results, indent=2)) + try: + # Initialize results list if not present + if "results" not in output_dict: + output_dict["results"] = [] + + # Extract search information + if "searchInformation" in raw_results: + search_info = raw_results["searchInformation"] + output_dict["total_results_found"] = int(search_info.get("totalResults", "0")) + output_dict["search_time"] = float(search_info.get("searchTime", 0.0)) + + # Extract spelling suggestions + if "spelling" in raw_results: + output_dict["spell_suggestions"] = raw_results["spelling"].get("correctedQuery") + + # Extract search parameters from queries + if "queries" in raw_results and "request" in raw_results["queries"]: + request = raw_results["queries"]["request"][0] + output_dict.update({ + "search_query": request.get("searchTerms", ""), + "search_lang": request.get("language", ""), + "result_count": request.get("count", 0), + "safesearch": request.get("safe", None), + "exactTerms": request.get("exactTerms", None), + "excludeTerms": request.get("excludeTerms", None), + "filter": request.get("filter", None), + "geolocation": request.get("gl", None), + "search_result_language": request.get("hl", None), + "sort_results_by": request.get("sort", None) + }) + + # Process search results + if "items" in raw_results: + for item in raw_results["items"]: + processed_result = { + "title": item.get("title", ""), + "url": item.get("link", ""), + # IMPORTANT: 'snippet' is used as 'content' + "content": item.get("snippet", ""), + "metadata": { + "date_published": item.get("pagemap", {}) + .get("metatags", [{}])[0] + .get("article:published_time"), + "author": item.get("pagemap", {}) + .get("metatags", [{}])[0] + .get("article:author"), + "source": item.get("displayLink", None), + "language": item.get("language", None), + "relevance_score": None, # Google doesn't provide this directly + "snippet": item.get("snippet", None), + "file_format": item.get("fileFormat", None), + "mime_type": item.get("mime", None), + "cache_url": item.get("cacheId", None) + } + } + + # Extract additional metadata if available + if "pagemap" in item: + pagemap = item["pagemap"] + if "metatags" in pagemap and pagemap["metatags"]: + metatags = pagemap["metatags"][0] + processed_result["metadata"].update({ + "description": metatags.get("og:description", + metatags.get("description")), + "keywords": metatags.get("keywords"), + "site_name": metatags.get("og:site_name") + }) + + output_dict["results"].append(processed_result) + + # Add pagination information + output_dict["pagination"] = { + "has_next": "nextPage" in raw_results.get("queries", {}), + "has_previous": "previousPage" in raw_results.get("queries", {}), + "current_page": raw_results.get("queries", {}) + .get("request", [{}])[0] + .get("startIndex", 1) + } + + except Exception as e: + logging.error(f"Error processing Google results: {str(e)}") + output_dict["processing_error"] = f"Error processing Google results: {str(e)}" + + +def test_parse_google_results(): + parsed_results = {} + raw_results = {} + raw_results = test_search_google() + parse_google_results(raw_results, parsed_results) + print(f"Parsed search results: {parsed_results}") + pass + + + +######################### Kagi Search ######################### +# +# https://help.kagi.com/kagi/api/search.html +def search_web_kagi(query: str, limit: int = 10) -> Dict: + search_url = "https://kagi.com/api/v0/search" + + # load key from config file + kagi_api_key = loaded_config_data['search_engines']['kagi_search_api_key'] + if not kagi_api_key: + raise ValueError("Please provide a valid Kagi Search API subscription key") + + """ + Queries the Kagi Search API with the given query and limit. + """ + if kagi_api_key is None: + raise ValueError("API key is required.") + + headers = {"Authorization": f"Bot {kagi_api_key}"} + endpoint = f"{search_url}/search" + params = {"q": query, "limit": limit} + + response = requests.get(endpoint, headers=headers, params=params) + response.raise_for_status() + logging.debug(response.json()) + return response.json() + + +def test_search_kagi(): + search_term = "How can I bake a cherry cake" + result_count = 10 + result = search_web_kagi(search_term, result_count) + print(result) + + +def parse_kagi_results(raw_results: Dict, output_dict: Dict) -> None: + """ + Parse Kagi search results and update the output dictionary + + Args: + raw_results (Dict): Raw Kagi API response + output_dict (Dict): Dictionary to store processed results + """ + try: + # Extract metadata + if "meta" in raw_results: + meta = raw_results["meta"] + output_dict["search_time"] = meta.get("ms", 0) / 1000.0 # Convert to seconds + output_dict["api_balance"] = meta.get("api_balance") + output_dict["search_id"] = meta.get("id") + output_dict["node"] = meta.get("node") + + # Process search results + if "data" in raw_results: + for item in raw_results["data"]: + # Skip related searches (type 1) + if item.get("t") == 1: + output_dict["related_searches"] = item.get("list", []) + continue + + # Process regular search results (type 0) + if item.get("t") == 0: + processed_result = { + "title": item.get("title", ""), + "url": item.get("url", ""), + "content": item.get("snippet", ""), + "metadata": { + "date_published": item.get("published"), + "author": None, # Kagi doesn't typically provide this + "source": None, # Could be extracted from URL if needed + "language": None, # Kagi doesn't typically provide this + "relevance_score": None, + "snippet": item.get("snippet"), + "thumbnail": item.get("thumbnail", {}).get("url") if "thumbnail" in item else None + } + } + output_dict["results"].append(processed_result) + + # Update total results count + output_dict["total_results_found"] = len([ + item for item in raw_results["data"] + if item.get("t") == 0 + ]) + + except Exception as e: + output_dict["processing_error"] = f"Error processing Kagi results: {str(e)}" + + +def test_parse_kagi_results(): + pass + + + +######################### SearX Search ######################### +# +# https://searx.space +# https://searx.github.io/searx/dev/search_api.html +def searx_create_session() -> requests.Session: + """ + Create a requests session with retry logic. + """ + session = requests.Session() + retries = Retry( + total=3, # Maximum number of retries + backoff_factor=1, # Exponential backoff factor + status_forcelist=[429, 500, 502, 503, 504], # Retry on these status codes + allowed_methods=["GET"] # Only retry on GET requests + ) + adapter = HTTPAdapter(max_retries=retries) + session.mount("http://", adapter) + session.mount("https://", adapter) + return session + +def search_web_searx(search_query, language='auto', time_range='', safesearch=0, pageno=1, categories='general', searx_url=None): + """ + Perform a search using a Searx instance. + + Args: + search_query (str): The search query. + language (str): Language for the search results. + time_range (str): Time range for the search results. + safesearch (int): Safe search level (0=off, 1=moderate, 2=strict). + pageno (int): Page number of the results. + categories (str): Categories to search in (e.g., 'general', 'news'). + searx_url (str): Custom Searx instance URL (optional). + + Returns: + str: JSON string containing the search results or an error message. + """ + # Use the provided Searx URL or fall back to the configured one + if not searx_url: + searx_url = loaded_config_data['search_engines']['searx_search_api_url'] + if not searx_url: + return json.dumps({"error": "SearX Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet."}) + + # Validate and construct URL + try: + parsed_url = urlparse(searx_url) + params = { + 'q': search_query, + 'language': language, + 'time_range': time_range, + 'safesearch': safesearch, + 'pageno': pageno, + 'categories': categories + } + search_url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(params)}" + logging.info(f"Search URL: {search_url}") + except Exception as e: + return json.dumps({"error": f"Invalid URL configuration: {str(e)}"}) + + # Perform the search request + try: + # Mimic browser headers + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Referer': 'https://www.google.com/', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1' + } + + # Add a random delay to mimic human behavior + delay = random.uniform(2, 5) # Random delay between 2 and 5 seconds + time.sleep(delay) + + session = searx_create_session() + response = session.get(search_url, headers=headers) + response.raise_for_status() + + # Check if the response is JSON + content_type = response.headers.get('Content-Type', '') + if 'application/json' in content_type: + search_data = response.json() + else: + # If not JSON, assume it's HTML and parse it + from bs4 import BeautifulSoup + soup = BeautifulSoup(response.text, 'html.parser') + search_data = parse_html_search_results_generic(soup) + + # Process results + data = [] + for result in search_data: + data.append({ + 'title': result.get('title'), + 'link': result.get('url'), + 'snippet': result.get('content'), + 'publishedDate': result.get('publishedDate') + }) + + if not data: + return json.dumps({"error": "No information was found online for the search query."}) + + return json.dumps(data) + + except requests.exceptions.RequestException as e: + logging.error(f"Error searching for content: {str(e)}") + return json.dumps({"error": f"There was an error searching for content. {str(e)}"}) + +def test_search_searx(): + # Use a different Searx instance to avoid rate limiting + searx_url = "https://searx.be" # Example of a different Searx instance + result = search_web_searx("What goes into making a cherry cake?", searx_url=searx_url) + print(result) + +def parse_searx_results(searx_search_results, web_search_results_dict): + pass + +def test_parse_searx_results(): + pass + + + + +######################### Serper.dev Search ######################### +# +# https://github.com/YassKhazzan/openperplex_backend_os/blob/main/sources_searcher.py +def search_web_serper(): + pass + + +def test_search_serper(): + pass + +def parse_serper_results(serper_search_results, web_search_results_dict): + pass + + + + +######################### Tavily Search ######################### +# +# https://github.com/YassKhazzan/openperplex_backend_os/blob/main/sources_searcher.py +def search_web_tavily(search_query, result_count=10, site_whitelist=None, site_blacklist=None): + # Check if API URL is configured + tavily_api_url = "https://api.tavily.com/search" + + tavily_api_key = loaded_config_data['search_engines']['tavily_search_api_key'] + + # Prepare the request payload + payload = { + "api_key": tavily_api_key, + "query": search_query, + "max_results": result_count + } + + # Add optional parameters if provided + if site_whitelist: + payload["include_domains"] = site_whitelist + if site_blacklist: + payload["exclude_domains"] = site_blacklist + + # Perform the search request + try: + headers = { + 'Content-Type': 'application/json', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0' + } + + response = requests.post(tavily_api_url, headers=headers, data=json.dumps(payload)) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + return f"There was an error searching for content. {str(e)}" + + +def test_search_tavily(): + result = search_web_tavily("How can I bake a cherry cake?") + print(result) + + +def parse_tavily_results(tavily_search_results, web_search_results_dict): + pass + + +def test_parse_tavily_results(): + pass + + + + +######################### Yandex Search ######################### +# +# https://yandex.cloud/en/docs/search-api/operations/web-search +# https://yandex.cloud/en/docs/search-api/quickstart/ +# https://yandex.cloud/en/docs/search-api/concepts/response +# https://github.com/yandex-cloud/cloudapi/blob/master/yandex/cloud/searchapi/v2/search_query.proto +def search_web_yandex(): + pass + + +def test_search_yandex(): + pass + +def parse_yandex_results(yandex_search_results, web_search_results_dict): + pass + + +# +# End of WebSearch_APIs.py +####################################################################################################################### diff --git a/Helper_Scripts/Workflows/Workflows.json b/App_Function_Libraries/Workflows/Workflows.json similarity index 81% rename from Helper_Scripts/Workflows/Workflows.json rename to App_Function_Libraries/Workflows/Workflows.json index e10f8436f..d82b4942b 100644 --- a/Helper_Scripts/Workflows/Workflows.json +++ b/App_Function_Libraries/Workflows/Workflows.json @@ -11,22 +11,6 @@ "Why did this final event happen?" ] }, - { - "name": "Summarization", - "context": "A process of condensing information into a concise form while retaining the main points. Start with the text to be summarized and provide a brief summary. Subsequent messages should refine the summary to make it more concise and focused.", - "prompts": [ - "Summarize the following text:", - "Refine the summary to be more concise:" - ] - }, - { - "name": "Question Answering", - "context": "A task that involves answering questions based on a given context or information. Start with the question and provide an answer based on the context. Subsequent messages should provide more detailed answers or explanations.", - "prompts": [ - "Answer the following question based on the context:", - "Provide a more detailed answer:" - ] - }, { "name": "Brainstorming", "context": "A creative technique for generating ideas and solutions through group discussion. Start with a topic or problem statement and generate initial ideas. Subsequent messages should expand on the ideas, refine them, and summarize the final set of ideas.", @@ -47,37 +31,28 @@ ] }, { - "name": "Product Description", - "context": "A detailed description of a product, highlighting its features, benefits, and value proposition. Start with the product details and write an initial description. Subsequent messages should enhance the description, simplify it, and optimize it for search engines.", + "name": "Concept Mastery", + "context": "A process for understanding a topic deeply and comprehensively. Start by identifying the core concepts, then explore subtopics, examples, and applications. Subsequent steps involve clarifying difficult areas and applying knowledge in practice scenarios.", "prompts": [ - "Write an initial product description based on the provided details:", - "Enhance the description with key features and benefits:", - "Simplify and make the description more engaging:", - "Optimize the description for SEO:" + "Identify the main concepts or themes of the topic:", + "Explore the subtopics and provide examples for each:", + "Clarify any areas or concepts you find difficult to understand:", + "Develop a practical application or scenario using the knowledge:", + "Review and summarize the key points of the topic:" ] }, { - "name": "SWOT Analysis", - "context": "A strategic planning tool used to identify the strengths, weaknesses, opportunities, and threats of a subject. Start with the subject of analysis and identify its strengths. Subsequent messages should cover weaknesses, opportunities, threats, and summarize the analysis.", + "name": "Conflict Resolution", + "context": "A process of addressing and resolving conflicts between individuals or groups. Start with identifying the conflict and parties involved, then explore solutions or compromises. Subsequent messages should plan a mediation session, evaluate outcomes, and ensure satisfaction.", "prompts": [ - "Identify the strengths of the subject:", - "Identify the weaknesses of the subject:", - "Identify potential opportunities for the subject:", - "Identify potential threats to the subject:", - "Summarize the SWOT analysis:" + "Describe the conflict and the parties involved:", + "Identify the underlying causes of the conflict:", + "Explore potential solutions or compromises:", + "Plan a mediation or discussion session to address the conflict:", + "Evaluate the outcomes and ensure all parties are satisfied:" ] }, { - "name": "User Story Creation", - "context": "A technique used in agile software development to capture user requirements from a user's perspective. Start with the user role and a brief description of the feature. Subsequent messages should detail user needs, acceptance criteria, and prioritize the user story.", - "prompts": [ - "Draft an initial user story for the feature:", - "Clarify the user needs and expected outcomes:", - "Refine the user story to ensure clarity and completeness:", - "Review and prioritize the user story for development:" - ] - }, - { "name": "Content Outline Creation", "context": "A structured plan that outlines the main topics and subtopics of a piece of content. Start with the content type and purpose, then list the main topics. Subsequent messages should detail subtopics, key points, and refine the outline for logical flow.", "prompts": [ @@ -88,13 +63,14 @@ ] }, { - "name": "Risk Assessment", - "context": "An evaluation of potential risks associated with a project or decision. Start with the project or decision and identify risks. Subsequent messages should analyze impact, likelihood, suggest mitigation strategies, and summarize the assessment.", + "name": "Creative Writing", + "context": "A process of writing fiction or poetry that involves generating ideas, drafting, revising, and editing. Start with a theme or prompt and brainstorm ideas. Subsequent messages should outline the plot, write a draft, refine characters or imagery, and polish the final version.", "prompts": [ - "Identify potential risks associated with the project/decision:", - "Analyze the impact and likelihood of each risk:", - "Suggest mitigation strategies for high-priority risks:", - "Summarize the risk assessment and proposed actions:" + "Generate ideas for a story or poem based on the following theme:", + "Outline the main plot or structure of the story:", + "Write the first draft of the opening scene or stanza:", + "Refine the draft by focusing on character development or imagery:", + "Edit and polish the final version for clarity and impact:" ] }, { @@ -117,6 +93,39 @@ "Make a recommendation based on the evaluation:" ] }, + { + "name": "Event Planning", + "context": "A process of organizing and coordinating an event, such as a conference, workshop, or party. Start with defining the purpose and goals of the event, then plan logistics and promotion. Subsequent messages should outline activities, develop a contingency plan, and ensure a successful event.", + "prompts": [ + "Outline the purpose and goals of the event:", + "List the key activities and sessions for the event:", + "Plan the logistics, including venue, catering, and AV setup:", + "Develop a promotional strategy to attract attendees:", + "Create a contingency plan for potential issues:" + ] + }, + { + "name": "Exam Preparation", + "context": "A systematic process to prepare effectively for an exam. Start by outlining the topics to cover, then assess strengths and weaknesses. Subsequent steps focus on targeted studying, practicing, and reviewing.", + "prompts": [ + "List all the topics or areas covered by the exam:", + "Identify your strengths and weaknesses in these topics:", + "Create a study plan prioritizing weaker areas and allocating time for review:", + "Practice with sample questions or problems in key areas:", + "Review your progress and refine your study approach as needed:" + ] + }, + { + "name": "Flaschards: Anki Flashcard Generation (Conceptual)", + "context": "A process to create meaningful Anki flashcards that help with conceptual understanding and recall. Start by identifying key concepts from the source material, then develop question-and-answer pairs that test understanding, refine them for clarity, and finalize the cards.", + "prompts": [ + "Identify the main concepts or topics from the source material:", + "Generate initial Q/A pairs that test understanding and recall of these concepts:", + "Refine the Q/A pairs for clarity, ensuring they are concise and easy to memorize:", + "Check the Q/A pairs for conceptual accuracy and adjust them if needed:", + "Finalize the Anki-ready flashcards for use:" + ] + }, { "name": "Goal Setting and Action Planning", "context": "A process of defining goals, breaking them down into actionable steps, and creating a plan for achievement. Start with the main goal and list steps. Subsequent messages should set timelines, identify resources, and review the plan.", @@ -129,28 +138,17 @@ ] }, { - "name": "User Experience (UX) Design", - "context": "A process of designing products or services that provide meaningful and relevant experiences to users. Start with user needs and pain points, then brainstorm solutions. Subsequent messages should evaluate solutions, create prototypes, and plan for user testing.", + "name": "Job Interview Preparation", + "context": "A process of researching, practicing, and preparing for a job interview to increase the chances of success. Start with researching the company and common interview questions. Subsequent messages should prepare answers, review the resume, and develop questions for the interviewer.", "prompts": [ - "Describe the user needs and pain points:", - "Brainstorm potential design solutions to address these needs:", - "Evaluate the feasibility and impact of each design solution:", - "Create a prototype or mockup based on the selected design:", - "Plan for user testing and feedback collection:" + "Research the company and summarize key information:", + "Identify the most common interview questions for this role:", + "Prepare answers to behavioral and situational questions:", + "Review your resume and highlight relevant experiences:", + "Develop questions to ask the interviewer:" ] }, { - "name": "Root Cause Analysis", - "context": "A methodical approach to identifying the underlying cause of a problem or issue. Start with the problem statement and list possible causes. Subsequent messages should use the '5 Whys' technique to drill down to the root cause and propose solutions.", - "prompts": [ - "Describe the problem or issue in detail:", - "Identify possible causes of the problem:", - "Use the '5 Whys' technique to drill down to the root cause:", - "Propose potential solutions to address the root cause:", - "Develop an action plan to implement the solution and monitor its effectiveness:" - ] - }, -{ "name": "Literature Review", "context": "A critical evaluation of existing research and literature on a specific topic. Start with the research question or topic and summarize key findings. Subsequent messages should identify gaps, compare methodologies, synthesize findings, and suggest future research directions.", "prompts": [ @@ -160,17 +158,6 @@ "Synthesize the findings into a cohesive narrative:" ] }, - { - "name": "Creative Writing", - "context": "A process of writing fiction or poetry that involves generating ideas, drafting, revising, and editing. Start with a theme or prompt and brainstorm ideas. Subsequent messages should outline the plot, write a draft, refine characters or imagery, and polish the final version.", - "prompts": [ - "Generate ideas for a story or poem based on the following theme:", - "Outline the main plot or structure of the story:", - "Write the first draft of the opening scene or stanza:", - "Refine the draft by focusing on character development or imagery:", - "Edit and polish the final version for clarity and impact:" - ] - }, { "name": "Marketing Campaign Planning", "context": "A strategic approach to promoting a product, service, or brand through coordinated marketing efforts. Start with defining the target audience and goals, then brainstorm creative ideas. Subsequent messages should outline channels, set timelines, assign responsibilities, and evaluate performance.", @@ -183,69 +170,86 @@ ] }, { - "name": "Event Planning", - "context": "A process of organizing and coordinating an event, such as a conference, workshop, or party. Start with defining the purpose and goals of the event, then plan logistics and promotion. Subsequent messages should outline activities, develop a contingency plan, and ensure a successful event.", + "name": "Personal Reflection and Growth", + "context": "A process of self-assessment, learning, and goal-setting to achieve personal growth and development. Start with reflecting on recent experiences and identifying key learnings. Subsequent messages should set personal goals, identify habits to change, develop an action plan, and review progress.", "prompts": [ - "Outline the purpose and goals of the event:", - "List the key activities and sessions for the event:", - "Plan the logistics, including venue, catering, and AV setup:", - "Develop a promotional strategy to attract attendees:", - "Create a contingency plan for potential issues:" + "Reflect on your recent experiences and identify key learnings:", + "Set personal goals based on your reflections:", + "Identify any habits or behaviors you want to change or improve:", + "Develop an action plan to work towards your goals:", + "Review your progress periodically and adjust your plan as needed:" ] }, { - "name": "Job Interview Preparation", - "context": "A process of researching, practicing, and preparing for a job interview to increase the chances of success. Start with researching the company and common interview questions. Subsequent messages should prepare answers, review the resume, and develop questions for the interviewer.", + "name": "Product Launch", + "context": "A strategic process of introducing a new product or service to the market. Start with defining the unique value proposition and target market, then plan development milestones and go-to-market strategy. Subsequent messages should outline the launch event, monitor performance, and gather feedback.", "prompts": [ - "Research the company and summarize key information:", - "Identify the most common interview questions for this role:", - "Prepare answers to behavioral and situational questions:", - "Review your resume and highlight relevant experiences:", - "Develop questions to ask the interviewer:" + "Define the product's unique value proposition and target market:", + "Outline the product development milestones and key deliverables:", + "Develop a go-to-market strategy, including pricing, distribution, and promotion:", + "Plan the launch event or activities:", + "Monitor the launch performance and gather customer feedback:" ] }, { - "name": "Conflict Resolution", - "context": "A process of addressing and resolving conflicts between individuals or groups. Start with identifying the conflict and parties involved, then explore solutions or compromises. Subsequent messages should plan a mediation session, evaluate outcomes, and ensure satisfaction.", + "name": "Product Description", + "context": "A detailed description of a product, highlighting its features, benefits, and value proposition. Start with the product details and write an initial description. Subsequent messages should enhance the description, simplify it, and optimize it for search engines.", "prompts": [ - "Describe the conflict and the parties involved:", - "Identify the underlying causes of the conflict:", - "Explore potential solutions or compromises:", - "Plan a mediation or discussion session to address the conflict:", - "Evaluate the outcomes and ensure all parties are satisfied:" + "Write an initial product description based on the provided details:", + "Enhance the description with key features and benefits:", + "Simplify and make the description more engaging:", + "Optimize the description for SEO:" ] }, { - "name": "Time Management", - "context": "A set of practices and tools to manage time effectively and prioritize tasks. Start with listing tasks and activities, then prioritize based on urgency and importance. Subsequent messages should create a daily schedule, identify time-wasters, and review progress.", + "name": "Question Answering", + "context": "A task that involves answering questions based on a given context or information. Start with the question and provide an answer based on the context. Subsequent messages should provide more detailed answers or explanations.", "prompts": [ - "List all the tasks and activities you need to complete this week:", - "Prioritize the tasks based on urgency and importance:", - "Create a daily schedule, allocating time blocks for each task:", - "Identify any potential time-wasters or distractions:", - "Review your progress at the end of the week and adjust your approach:" + "Answer the following question based on the context:", + "Provide a more detailed answer:" ] }, { - "name": "Product Launch", - "context": "A strategic process of introducing a new product or service to the market. Start with defining the unique value proposition and target market, then plan development milestones and go-to-market strategy. Subsequent messages should outline the launch event, monitor performance, and gather feedback.", + "name": "Quiz Prep: Comprehension-Focused", + "context": "A process to prepare for quizzes by thoroughly understanding the material and practicing application. Start by outlining quiz topics, then focus on comprehension, application, and final review.", "prompts": [ - "Define the product's unique value proposition and target market:", - "Outline the product development milestones and key deliverables:", - "Develop a go-to-market strategy, including pricing, distribution, and promotion:", - "Plan the launch event or activities:", - "Monitor the launch performance and gather customer feedback:" + "List the topics or concepts that the quiz will cover:", + "Summarize the main ideas or explanations for each topic in your own words:", + "Practice applying these ideas to examples, questions, or problems:", + "Identify any concepts that are unclear and revisit them for clarification:", + "Review and test your understanding with practice questions:" ] }, { - "name": "Personal Reflection and Growth", - "context": "A process of self-assessment, learning, and goal-setting to achieve personal growth and development. Start with reflecting on recent experiences and identifying key learnings. Subsequent messages should set personal goals, identify habits to change, develop an action plan, and review progress.", + "name": "Quiz Prep: Rapid Recall", + "context": "A process for efficiently preparing for short quizzes by focusing on key facts, reviewing, and testing recall. Start by identifying quiz topics, then focus on memorizing key points, practicing recall, and assessing readiness.", "prompts": [ - "Reflect on your recent experiences and identify key learnings:", - "Set personal goals based on your reflections:", - "Identify any habits or behaviors you want to change or improve:", - "Develop an action plan to work towards your goals:", - "Review your progress periodically and adjust your plan as needed:" + "List the main topics or concepts the quiz will cover:", + "Identify the key facts, definitions, or formulas for each topic:", + "Create flashcards or a memory aid to reinforce these key points:", + "Practice recalling the information without looking at notes:", + "Take a short self-quiz to evaluate your readiness and fill any gaps:" + ] + }, + { + "name": "Risk Assessment", + "context": "An evaluation of potential risks associated with a project or decision. Start with the project or decision and identify risks. Subsequent messages should analyze impact, likelihood, suggest mitigation strategies, and summarize the assessment.", + "prompts": [ + "Identify potential risks associated with the project/decision:", + "Analyze the impact and likelihood of each risk:", + "Suggest mitigation strategies for high-priority risks:", + "Summarize the risk assessment and proposed actions:" + ] + }, + { + "name": "Root Cause Analysis", + "context": "A methodical approach to identifying the underlying cause of a problem or issue. Start with the problem statement and list possible causes. Subsequent messages should use the '5 Whys' technique to drill down to the root cause and propose solutions.", + "prompts": [ + "Describe the problem or issue in detail:", + "Identify possible causes of the problem:", + "Use the '5 Whys' technique to drill down to the root cause:", + "Propose potential solutions to address the root cause:", + "Develop an action plan to implement the solution and monitor its effectiveness:" ] }, { @@ -258,5 +262,56 @@ "Refine the pitch by addressing potential objections:", "Practice delivering the pitch with confidence and clarity:" ] + }, + { + "name": "Summarization", + "context": "A process of condensing information into a concise form while retaining the main points. Start with the text to be summarized and provide a brief summary. Subsequent messages should refine the summary to make it more concise and focused.", + "prompts": [ + "Summarize the following text:", + "Refine the summary to be more concise:" + ] + }, + { + "name": "SWOT Analysis", + "context": "A strategic planning tool used to identify the strengths, weaknesses, opportunities, and threats of a subject. Start with the subject of analysis and identify its strengths. Subsequent messages should cover weaknesses, opportunities, threats, and summarize the analysis.", + "prompts": [ + "Identify the strengths of the subject:", + "Identify the weaknesses of the subject:", + "Identify potential opportunities for the subject:", + "Identify potential threats to the subject:", + "Summarize the SWOT analysis:" + ] + }, + { + "name": "Time Management", + "context": "A set of practices and tools to manage time effectively and prioritize tasks. Start with listing tasks and activities, then prioritize based on urgency and importance. Subsequent messages should create a daily schedule, identify time-wasters, and review progress.", + "prompts": [ + "List all the tasks and activities you need to complete this week:", + "Prioritize the tasks based on urgency and importance:", + "Create a daily schedule, allocating time blocks for each task:", + "Identify any potential time-wasters or distractions:", + "Review your progress at the end of the week and adjust your approach:" + ] + }, + { + "name": "User Story Creation", + "context": "A technique used in agile software development to capture user requirements from a user's perspective. Start with the user role and a brief description of the feature. Subsequent messages should detail user needs, acceptance criteria, and prioritize the user story.", + "prompts": [ + "Draft an initial user story for the feature:", + "Clarify the user needs and expected outcomes:", + "Refine the user story to ensure clarity and completeness:", + "Review and prioritize the user story for development:" + ] + }, + { + "name": "User Experience (UX) Design", + "context": "A process of designing products or services that provide meaningful and relevant experiences to users. Start with user needs and pain points, then brainstorm solutions. Subsequent messages should evaluate solutions, create prototypes, and plan for user testing.", + "prompts": [ + "Describe the user needs and pain points:", + "Brainstorm potential design solutions to address these needs:", + "Evaluate the feasibility and impact of each design solution:", + "Create a prototype or mockup based on the selected design:", + "Plan for user testing and feedback collection:" + ] } ] diff --git a/App_Function_Libraries/Workflows/Workflows.py b/App_Function_Libraries/Workflows/Workflows.py new file mode 100644 index 000000000..97d50adb4 --- /dev/null +++ b/App_Function_Libraries/Workflows/Workflows.py @@ -0,0 +1,193 @@ +# Workflows.py +# +######################################### +# Workflow Library +# This library is used to facilitate chained prompt workflows +# +#### +#################### +# Function Categories +# +# Fixme +# +# +#################### +# Function List +# +# 1. FIXME +# +#################### +# +# Import necessary libraries +import json +import logging +from pathlib import Path +from typing import List, Dict, Tuple, Optional +# +# Non-Local Imports +# +# Local Imports +from App_Function_Libraries.Chat.Chat_Functions import chat +from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram +# +####################################################################################################################### +# +# Function Definitions + +# Load workflows from a JSON file +json_path = Path('./App_Function_Libraries/Workflows/Workflows.json') + +# Load workflows from a JSON file +def load_workflows(json_path: str = './App_Function_Libraries/Workflows/Workflows.json') -> List[Dict]: + with Path(json_path).open('r') as f: + return json.load(f) + +# Initialize a workflow +def initialize_workflow(workflow_name: str, workflows: List[Dict]) -> Tuple[Dict, str, List[Tuple[Optional[str], str]]]: + selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None) + if selected_workflow: + num_prompts = len(selected_workflow['prompts']) + context = selected_workflow.get('context', '') + first_prompt = selected_workflow['prompts'][0] + initial_chat = [(None, f"{first_prompt}")] + workflow_state = {"current_step": 0, "max_steps": num_prompts, "conversation_id": None} + logging.info(f"Initializing workflow: {workflow_name} with {num_prompts} steps") + return workflow_state, context, initial_chat + else: + logging.error(f"Selected workflow not found: {workflow_name}") + return {"current_step": 0, "max_steps": 0, "conversation_id": None}, "", [] + + +# Process a workflow step +def process_workflow_step( + message: str, + history: List[Tuple[Optional[str], str]], + context: str, + workflow_name: str, + workflows: List[Dict], + workflow_state: Dict, + api_endpoint: Optional[str] = None, + api_key: Optional[str] = None, + save_conv: bool = False, + temp: float = 0.7, + system_message: Optional[str] = None, + media_content: Dict = {}, + selected_parts: List[str] = [] +) -> Tuple[List[Tuple[Optional[str], str]], Dict, bool]: + logging.info(f"Process workflow step called with message: {message}") + logging.info(f"Current workflow state: {workflow_state}") + + try: + selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None) + if not selected_workflow: + logging.error(f"Selected workflow not found: {workflow_name}") + return history, workflow_state, True + + current_step = workflow_state["current_step"] + max_steps = workflow_state["max_steps"] + + logging.info(f"Current step: {current_step}, Max steps: {max_steps}") + + if current_step >= max_steps: + logging.info("Workflow completed") + return history, workflow_state, False + + prompt = selected_workflow['prompts'][current_step] + full_message = f"{context}\n\nStep {current_step + 1}: {prompt}\nUser: {message}" + + logging.info(f"Preparing to process message: {full_message[:100]}...") + + # Use the existing chat function + bot_message = chat( + full_message, history, media_content, selected_parts, + api_endpoint, api_key, prompt, temp, system_message + ) + + logging.info(f"Received bot_message: {bot_message[:100]}...") + + new_history = history + [(message, bot_message)] + next_step = current_step + 1 + new_workflow_state = { + "current_step": next_step, + "max_steps": max_steps, + "conversation_id": workflow_state["conversation_id"] + } + + if next_step >= max_steps: + logging.info("Workflow completed after this step") + return new_history, new_workflow_state, False + else: + next_prompt = selected_workflow['prompts'][next_step] + new_history.append((None, f"Step {next_step + 1}: {next_prompt}")) + logging.info(f"Moving to next step: {next_step}") + return new_history, new_workflow_state, True + + except Exception as e: + logging.error(f"Error in process_workflow_step: {str(e)}") + return history, workflow_state, True + + +# Main function to run a workflow +def run_workflow( + workflow_name: str, + initial_context: str = "", + api_endpoint: Optional[str] = None, + api_key: Optional[str] = None, + save_conv: bool = False, + temp: float = 0.7, + system_message: Optional[str] = None, + media_content: Dict = {}, + selected_parts: List[str] = [] +) -> List[Tuple[Optional[str], str]]: + workflows = load_workflows() + workflow_state, context, history = initialize_workflow(workflow_name, workflows) + + # Combine the initial_context with the workflow's context + combined_context = f"{initial_context}\n\n{context}".strip() + + while True: + user_input = input("Your input (or 'quit' to exit): ") + if user_input.lower() == 'quit': + break + + history, workflow_state, continue_workflow = process_workflow_step( + user_input, history, combined_context, workflow_name, workflows, workflow_state, + api_endpoint, api_key, save_conv, temp, system_message, media_content, selected_parts + ) + + for _, message in history[-2:]: # Print the last two messages (user input and bot response) + print(message) + + if not continue_workflow: + print("Workflow completed.") + break + + return history + +# Example usage +# if __name__ == "__main__": +# workflow_name = "Example Workflow" +# initial_context = "This is an example context." +# +# final_history = run_workflow( +# workflow_name, +# initial_context, +# api_endpoint="your_api_endpoint", +# api_key="your_api_key", +# save_conv=True, +# temp=0.7, +# system_message="You are a helpful assistant guiding through a workflow.", +# media_content={}, +# selected_parts=[] +# ) +# +# print("Final conversation history:") +# for user_message, bot_message in final_history: +# if user_message: +# print(f"User: {user_message}") +# print(f"Bot: {bot_message}") +# print() + +# +# End of Workflows.py +####################################################################################################################### diff --git a/App_Function_Libraries/Workflows/__init__.py b/App_Function_Libraries/Workflows/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6646428be..2f7d92bf6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,8 +1,7 @@ Contributing ============ -Contributions are welcome, and they are greatly appreciated! Every -little bit helps, and credit will always be given. +Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given. You can contribute in many ways: diff --git a/Config_Files/Backup_Config.txt b/Config_Files/Backup_Config.txt index 1ef21b91b..eb5b40e56 100644 --- a/Config_Files/Backup_Config.txt +++ b/Config_Files/Backup_Config.txt @@ -1,71 +1,251 @@ [API] anthropic_api_key = anthropic_model = claude-3-5-sonnet-20240620 +anthropic_streaming = True +anthropic_temperature = 0.7 +anthropic_top_p = 0.95 +anthropic_min_p = 0.05 +# cohere_api_key = cohere_model = command-r-plus +cohere_streaming = True +cohere_temperature = 0.7 +# groq_api_key = groq_model = llama3-70b-8192 +groq_streaming = True +groq_temperature = 0.7 +# openai_api_key = openai_model = gpt-4o +openai_streaming = False +openai_temperature = 0.7 +openai_top_p = 0.95 +# huggingface_api_key = huggingface_model = meta-llama/Llama-3.1-70B-Instruct +huggingface_streaming = True +huggingface_temperature = 0.7 +# openrouter_api_key = openrouter_model = mistralai/mistral-7b-instruct:free +# deepseek_api_key = deepseek_model = deepseek-chat +deepseek_streaming = True +deepseek_temperature = 0.7 +# mistral_api_key = mistral_model = mistral-large-latest +mistral_streaming = True +mistral_temperature = 0.7 +# google_api_key = +# Available Model Options: google_model = gemini-1.5-pro +google_streaming = True +google_temperature = 0.7 +# +elevenlabs_api_key = +# custom_openai_api_key = custom_openai_api_ip = +custom_openai_api_streaming = True +custom_openai_api_temperature = 0.7 +custom_openai_api_top_p = 0.9 +custom_openai_api_min_p = 0.05 +# default_api = openai + [Local-API] kobold_api_IP = http://127.0.0.1:5001/api/v1/generate kobold_openai_api_IP = http://127.0.0.1:5001/v1/chat/completions kobold_api_key = +kobold_streaming = True +kobold_temperature = 0.7 +kobold_top_p = 0.9 +kobold_min_p = 0.05 +# llama_api_IP = http://127.0.0.1:8080/completion llama_api_key = +llama_streaming = True +llama_temperature = 0.7 +llama_top_p = 0.9 +llama_min_p = 0.05 +# ooba_api_key = ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions +ooba_streaming = True +ooba_temperature = 0.7 +ooba_top_p = 0.9 +ooba_min_p = 0.05 +# tabby_api_IP = http://127.0.0.1:5000/v1/chat/completions tabby_api_key = +tabby_streaming = True +tabby_temperature = 0.7 +# vllm_api_IP = http://127.0.0.1:8000/v1/chat/completions vllm_model = +vllm_api_key = +vllm_streaming = True +vllm_temperature = 0.7 +# ollama_api_IP = http://127.0.0.1:11434/v1/chat/completions ollama_api_key = ollama_model = llama3 +ollama_streaming = True +# aphrodite_api_IP = http://127.0.0.1:8080/completion aphrodite_api_key = +aphrodite_streaming = True +aphrodite_temperature = 0.7 +aphrodite_top_p = 0.9 +aphrodite_min_p = 0.05 +aphrodite_model = +# max_tokens = 4096 +local_api_timeout = 90 +local_api_retries = 3 +local_api_retry_delay = 5 +streaming = True +temperature = 0.7 +top_p = 0.9 +min_p = 0.05 +# https://artefact2.github.io/llm-sampling/ [Processing] processing_choice = cuda [Settings] +# Rename to LLM_API_Settings chunk_duration = 30 words_per_second = 3 save_character_chats = False save_rag_chats = False -local_api_timeout = 90 -local_api_retries = 3 -local_api_retry_delay = 5 -streaming = True -temperature = 0.7 -top_p = 0.9 -min_p = 0.05 -# https://artefact2.github.io/llm-sampling/ [Auto-Save] save_character_chats = False save_rag_chats = False +[TTS-Settings] +# General TTS Settings +default_tts_provider = openai +default_tts_voice = shimmer +default_tts_speed = 1 +# +# OpenAI TTS Settings +# available voices are 'alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer' +default_openai_tts_voice = shimmer +default_openai_tts_speed = 1 +# available models are 'tts-1' or 'tts-1-hd' +default_openai_tts_model = tts-1-hd +default_openai_tts_output_format = mp3 +# +# ElevenLabs TTS Settings +default_eleven_tts_voice = pNInz6obpgDQGcFmaJgB +default_eleven_tts_model = +default_eleven_tts_language_code = +default_eleven_tts_voice_stability = +default_eleven_tts_voice_similiarity_boost = +default_eleven_tts_voice_style = +default_eleven_tts_voice_use_speaker_boost = +default_eleven_tts_voice_pronunciation_dictionary_locators_dict_id = +default_eleven_tts_voice_pronunciation_dictionary_locators_version_id = +default_eleven_tts_speed = 1 +# Output options: 'mp3_22050_32', 'mp3_44100_32', 'mp3_44100_64', 'mp3_44100_96', 'mp3_44100_128', 'mp3_44100_192', 'pcm_16000', 'pcm_22050', 'pcm_24000', 'pcm_44100', 'ulaw_8000' +default_eleven_tts_output_format = mp3_44100_128 +# Google TTS Settings +default_google_tts_model = FIXME +default_google_tts_voice = FIXME +default_google_tts_speed = 1 +# +# MS Edge TTS Settings +edge_tts_voice = FIXME +# +# GPT-Sovits +# +# AllTalk TTS Settings +default_alltalk_tts_speed = 1.0 +default_alltalk_tts_voice = alloy +default_alltalk_tts_model = alltalk +default_alltalk_tts_output_format = mp3 +alltalk_api_ip = http://127.0.0.1:7851/v1/audio/speech + +[Search-Engines] +# Search Defaults +search_provider_default = google +search_language_query = en +search_language_results = en +search_language_analysis = en +search_default_max_queries = 10 +search_enable_subquery = True +search_enable_subquery_count_max = 5 +search_result_rerank = True +search_result_max = 15 +search_result_max_per_query = 10 +search_result_blacklist = [] +search_result_display_type = list +search_result_display_metadata = False +search_result_save_to_db = True +# How you want the results to be written, think 'style' or voice +search_result_analysis_tone = +relevance_analysis_llm = openai +final_answer_llm = openai +#### Search Engines ##### +# Baidu +search_engine_api_key_baidu = 1e1b1b1b1b1b1b1b1 +# +# Bing +search_engine_api_url_bing = https://api.bing.microsoft.com/v7.0/search +search_engine_api_key_bing = +search_engine_country_code_bing = en +# +# Brave +search_engine_api_key_brave_regular = +search_engine_api_key_brave_ai = +search_engine_country_code_brave = US +# +# DuckDuckGo +# +# Google +search_engine_api_url_google = https://www.googleapis.com/customsearch/v1? +search_engine_api_key_google = +search_engine_id_google = +# 0 = Enable / 1 = Disabled +enable_traditional_chinese = 0 +# Restricts search results to documents originating in a particular country. +limit_google_search_to_country = False +google_search_country_code = US +google_filter_setting = 1 +google_user_geolocation = US +google_ui_language = en +google_limit_search_results_to_language = +google_default_search_results = +google_safe_search = "active" +google_enable_site_search = +google_site_search_include = +google_site_search_exclude = +# https://developers.google.com/custom-search/docs/structured_search#sort-by-attribute +google_sort_results_by = +# +# Kagi +search_engine_api_key_kagi = > +# SearX +search_engine_searx_api = https://search.rhscz.eu/ +# Serper +# Tavily +search_engine_api_key_tavily = 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 +# Yandex +search_engine_api_key_yandex = 1e1b1b1b1b1b1b1b1 +search_engine_id_yandex = 1e1b1b1b1b1b1b1b1 [Prompts] prompt_sample = "What is the meaning of life?" video_summarize_prompt = "Above is the transcript of a video. Please read through the transcript carefully. Identify the main topics that are discussed over the course of the transcript. Then, summarize the key points about each main topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, but should be much shorter than the full transcript. Please output your bullet point summary inside tags. Do not repeat yourself while writing the summary." + [Database] type = sqlite sqlite_path = Databases/media_summary.db @@ -110,7 +290,6 @@ max_bytes = backup_count = 5 #int(os.getenv("tldw_LOG_BACKUP_COUNT", 5)) - #[Comments] #OpenAI Models: # gpt-4o diff --git a/Docs/Bitnet_and_Mamba_Papers.md b/Docs/Bitnet_and_Mamba_Papers.md index 8c47e9288..93f0d4d22 100644 --- a/Docs/Bitnet_and_Mamba_Papers.md +++ b/Docs/Bitnet_and_Mamba_Papers.md @@ -1,11 +1,13 @@ # Bitnet and MAMBA Papers +Bamba +https://huggingface.co/blog/bamba ### Bitnet - Unsorted - https://arxiv.org/abs/2411.11843 - + https://github.com/KYuuto1006/DQT ### Mamba diff --git a/Docs/Design/3rd-Party.md b/Docs/Design/3rd-Party.md new file mode 100644 index 000000000..1834a4b42 --- /dev/null +++ b/Docs/Design/3rd-Party.md @@ -0,0 +1,4 @@ +# 3rd Party Stuff + + +https://github.com/cohere-ai/quick-start-connectors \ No newline at end of file diff --git a/Docs/Design/Character_Chat.md b/Docs/Design/Character_Chat.md index f66c43af9..34ce60544 100644 --- a/Docs/Design/Character_Chat.md +++ b/Docs/Design/Character_Chat.md @@ -3,3 +3,6 @@ # Link Dump: https://github.com/caspianmoon/memoripy +https://arxiv.org/abs/2407.03974 + + diff --git a/Docs/Design/Chunking.md b/Docs/Design/Chunking.md new file mode 100644 index 000000000..f9a4f05de --- /dev/null +++ b/Docs/Design/Chunking.md @@ -0,0 +1,25 @@ +# Chunking + +## Overview +- Chunking is the process of breaking down a document into smaller pieces, or "chunks". This is useful for a variety of reasons, such as: +- + + + + + + +### Types of Chunking + + +### Implementation in tldw +- + + + +### Link Dump: +https://gleen.ai/blog/agentic-chunking-enhancing-rag-answers-for-completeness-and-accuracy/ + + + + diff --git a/Docs/Design/Coding_Page.md b/Docs/Design/Coding_Page.md index d8a0775a0..a1c57e587 100644 --- a/Docs/Design/Coding_Page.md +++ b/Docs/Design/Coding_Page.md @@ -10,4 +10,17 @@ https://github.com/brandondocusen/CntxtPY https://github.com/cyclotruc/gitdigest https://github.com/simonw/files-to-prompt https://github.com/yamadashy/repomix/tree/main +https://github.com/chanhx/crabviz +https://github.com/abinthomasonline/repo2txt +https://github.com/charmandercha/ArchiDoc +https://pythontutor.com/c.html#mode=edit +https://pythontutor.com/articles/c-cpp-visualizer.html +https://gitingest.com/ +https://gitdiagram.com/ +https://www.ilograph.com/blog/posts/diagrams-ai-can-and-cannot-generate/#system-diagramming-with-ai +https://github.com/osanseviero/geminicoder + + + + diff --git a/Docs/Design/Creative_Writing.md b/Docs/Design/Creative_Writing.md new file mode 100644 index 000000000..34fb55daf --- /dev/null +++ b/Docs/Design/Creative_Writing.md @@ -0,0 +1,17 @@ +# Creative Writing + + + + + + +### Link Dump: +https://github.com/p-e-w/arrows +https://huggingface.co/jukofyork/creative-writing-control-vectors-v3.0 +https://github.com/FartyPants/StoryCrafter/tree/main +https://github.com/datacrystals/AIStoryWriter +https://perchance.org/ai-story-generator +https://perchance.org/tutorial#repeating_things +https://github.com/Cloud-Code-AI/AkiraDocs + + diff --git a/Docs/Design/DB_Design.md b/Docs/Design/DB_Design.md index 9d171a981..27d536ef5 100644 --- a/Docs/Design/DB_Design.md +++ b/Docs/Design/DB_Design.md @@ -7,6 +7,11 @@ - [Interesting/Relevant Later](#interesting-relevant-later) + +SQLite + https://highperformancesqlite.com/watch/dot-commands + https://www.youtube.com/watch?v=XP-h304N06I + Migrating to sqlite-vec https://www.youtube.com/live/xmdiwdom6Vk?t=1740s https://alexgarcia.xyz/blog/2024/sqlite-vec-metadata-release/index.html @@ -14,6 +19,8 @@ Migrating to sqlite-vec https://docs.google.com/document/d/1sJ_S2ggfFmtPJupxIO3C1EZAFuDMUfNYcAytissbFMs/edit?tab=t.0#heading=h.xyau1jyb6vyx https://github.com/Mozilla-Ocho/llamafile/pull/644 +https://briandouglas.ie/sqlite-defaults/ +https://phiresky.github.io/blog/2020/sqlite-performance-tuning/ https://kerkour.com/sqlite-for-servers https://wafris.org/blog/rearchitecting-for-sqlite General DB: diff --git a/Docs/Design/Diagram_Generation.md b/Docs/Design/Diagram_Generation.md new file mode 100644 index 000000000..e71f72ad3 --- /dev/null +++ b/Docs/Design/Diagram_Generation.md @@ -0,0 +1,11 @@ +# Diagram Generation + + +### Link Dump: +https://excalidraw.com/ +https://www.napkin.ai/ +https://github.com/southbridgeai/diagen + + + + diff --git a/Docs/Design/ETL_Pipeline.md b/Docs/Design/ETL_Pipeline.md index d3a976f14..10598d592 100644 --- a/Docs/Design/ETL_Pipeline.md +++ b/Docs/Design/ETL_Pipeline.md @@ -3,44 +3,46 @@ ## Introduction This page serves as documentation regarding the ETL pipelines within tldw and provides context/justification for the details of each. - +https://towardsdatascience.com/etl-pipelines-in-python-best-practices-and-techniques-0c148452cc68 ## ETL Pipelines ### Data Sources -- - **Audio** - - f + - faster_whisper + - pyaudio - **Ebooks (epub)** - - f + - ebooklib - **PDFs** - Docling - pymupdf4llm - **Plain Text(`.md`, `.txt`)** - - f -- **Podcasts** - - f + - stdlib +- **PowerPoint Presentations** - need to add + - docling - **Rich Text(`.rtf`, `.docx`)** - - f + - doc2txt + - pypandoc - **RSS Feeds**: - f - **Videos** - f - **Websites**: - - f + - playwright + - bs4 + - requests - **XML Files** - - f' - - - + - xml.etree.ElementTree +- **3rd-Party Services** + - Sharepoint + * https://llamahub.ai/l/readers/llama-index-readers-microsoft-sharepoint + * + +### Tools +https://github.com/ucbepic/docetl +https://ucbepic.github.io/docetl/concepts/optimization/ +### Links +https://arxiv.org/html/2410.21169 ### Link Dump: -https://arxiv.org/abs/2410.12189 -https://ucbepic.github.io/docetl/concepts/optimization/ -https://arxiv.org/abs/2410.21169 -https://towardsdatascience.com/etl-pipelines-in-python-best-practices-and-techniques-0c148452cc68 -https://arxiv.org/html/2410.21169v2 -https://github.com/whyhow-ai/knowledge-table -https://github.com/yobix-ai/extractous -https://llamahub.ai/l/readers/llama-index-readers-microsoft-sharepoint diff --git a/Docs/Design/Education.md b/Docs/Design/Education.md new file mode 100644 index 000000000..66ae8339a --- /dev/null +++ b/Docs/Design/Education.md @@ -0,0 +1,34 @@ +# Education + + +### Link Dump: +https://arxiv.org/abs/2412.02035 +https://github.com/andreamust/NEON-GPT + +https://arxiv.org/abs/2411.07407 +https://arxiv.org/abs/2412.16429 +https://huggingface.co/papers/2412.15443 +https://github.com/thiswillbeyourgithub/AnkiAIUtils +https://news.ycombinator.com/item?id=42534931 +https://ankiweb.net/shared/info/1531888719 +https://bbycroft.net/llm +https://github.com/met4citizen/TalkingHead + + + +one2manny + — +Today at 12:43 AM +A great way to make studying more efficient and convenient is to take a digital PDF textbook, split it into separate files for each chapter, and organize them individually. +I then create a dedicated notebook for each chapter, treating it as a focused single source. +From there, I convert each chapter into an audio format, like a podcast. +This approach makes it easy to study while commuting, relaxing in bed with your eyes closed, or at any time when reading isn’t practical. + +I also recommend creating a study guide for each chapter, fully breaking down key concepts and definitions. +For more complex topics, the “explain like I’m 5” method works wonders—it simplifies challenging ideas into digestible explanations. + +To take this further, incorporate a Personal Knowledge Management (PKM) system into your routine. +Apps like Obsidian are perfect for this, with their flexible folder structures and Markdown formatting. +I optimize my AI outputs for Markdown so I can copy, paste, and organize them into clean, structured notes. +This ensures your materials are not only well-organized but also easy to access and build on later. +A solid PKM system is invaluable for managing knowledge and staying on top of your studies! \ No newline at end of file diff --git a/Docs/Design/Finetuning.md b/Docs/Design/Finetuning.md new file mode 100644 index 000000000..88ff85ec3 --- /dev/null +++ b/Docs/Design/Finetuning.md @@ -0,0 +1,12 @@ +# Finetuning + + + + +### Link Dump +https://colab.research.google.com/drive/1WVf8SUdZ8YllGyqL6fBSCcCI2AC1JGc4?usp=sharing&pli=1 +https://colab.research.google.com/drive/1hrE-MPvOBThQQA51f20m0qBUIgN_g7DG?usp=sharing +https://colab.research.google.com/drive/1hmOqYfamizWYe5xjX01z-IIROeaWRhA7?usp=sharing +https://colab.research.google.com/drive/1TT6NED5iFUGratZj4aHe13iOJkDTUUVT?usp=sharing +https://kaitchup.substack.com/p/the-recipe-for-extremely-accurate-quantization?triedRedirect=true +https://github.com/Kiln-AI/Kiln \ No newline at end of file diff --git a/Docs/Design/Inference_Engines.md b/Docs/Design/Inference_Engines.md new file mode 100644 index 000000000..dce6e81b5 --- /dev/null +++ b/Docs/Design/Inference_Engines.md @@ -0,0 +1,27 @@ +# Inference Engines + +## Introduction + + +### HuggingFace Transformers +https://huggingface.co/docs/transformers/main/en/conversations + +### Llama.cpp + + +### Llamafile + + +### TabbyAPI + + +### vLLM + + + +### Link Dump +https://github.com/intel/neural-compressor/tree/v3.2 +https://southbridge-research.notion.site/Entropixplained-11e5fec70db18022b083d7d7b0e93505 +https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms?utm_medium=email +https://magazine.sebastianraschka.com/p/lora-and-dora-from-scratch?utm_medium=email +https://arxiv.org/abs/2405.09673 \ No newline at end of file diff --git a/Docs/Design/Mindmaps_Diagrams.md b/Docs/Design/Mindmaps_Diagrams.md new file mode 100644 index 000000000..087b4bcdd --- /dev/null +++ b/Docs/Design/Mindmaps_Diagrams.md @@ -0,0 +1,5 @@ +# Mindmaps & Diagram Creation + + + +https://github.com/mermaid-js/mermaid \ No newline at end of file diff --git a/Docs/Design/Privacy.md b/Docs/Design/Privacy.md new file mode 100644 index 000000000..64814215d --- /dev/null +++ b/Docs/Design/Privacy.md @@ -0,0 +1,7 @@ +# Privacy-related + + +### Link Dump: +https://arxiv.org/abs/2412.03924 + + diff --git a/Docs/Design/Prompts.md b/Docs/Design/Prompts.md new file mode 100644 index 000000000..a64f6f6b3 --- /dev/null +++ b/Docs/Design/Prompts.md @@ -0,0 +1,87 @@ +# Prompts & Prompt Engineering + +### Link Dump: +https://github.com/PySpur-Dev/PySpur +https://github.com/itsPreto/tangent +https://arxiv.org/abs/2412.13171 +https://github.com/LouisShark/chatgpt_system_prompt +https://github.com/microsoft/PromptWizard + + +https://medium.com/@camauger/crafting-effective-chatgpt-prompts-for-tabletop-roleplaying-games-a-step-by-step-guide-part-1-b81a791d278d + + + + + + + + + + + + + + + +```https://gist.githubusercontent.com/dsartori/35de7f2ed879d5a5e50f6362dea2281b/raw/fb45b3ebbed46ebd99cd4a8d7083112ada596090/rag_prompt.txt +You are an expert assistant trained to retrieve and generate detailed information **only** from a curated dataset. Your primary goal is to answer natural-language queries accurately and concisely by extracting and synthesizing information explicitly available in the dataset. You are prohibited from making assumptions, inferences, or providing information that cannot be directly traced back to the dataset. The topics you specialize in are: + + +- policies and priorities +- organizational structure +- programs and operations +- key partnerships +- challenges +- history and legislation + + +### Guidelines for Responses: +1. **Source-Dependence**: + - Only provide answers based on explicit information in the dataset. + - Avoid making assumptions, synthesizing unrelated data, or inferring conclusions not directly supported by the dataset. + - If the requested information is not found, respond transparently with: *"This information is not available in the dataset."* + + +2. **Explicit Citations**: + - For every response, reference the specific chunk(s) or metadata field(s) that support your answer (e.g., "According to chunk 1-4, ..."). + - If multiple chunks are used, list all relevant sources to improve transparency. + + +3. **Clarification**: + - If a query is ambiguous or lacks sufficient context, ask clarifying questions before proceeding. + + +4. **Language Consistency**: + - Respond exclusively in the user’s language. Do not switch languages or interpret unless explicitly requested. + + +5. **Accuracy First**: + - Prioritize accuracy by strictly adhering to the dataset. Avoid providing speculative or generalized answers. + + +6. **General Before Specific**: + - Begin with a concise general overview of the relevant topic, based entirely on the dataset. + - Provide detailed insights, examples, or elaborations only upon follow-up or explicit request. + + +7. **Iterative Engagement**: + - Encourage the user to refine or expand their queries to enable more precise responses. + + +### Response Structure: +1. **General Overview**: Provide a high-level summary of the relevant information available in the dataset. +2. **Detailed Insights (If Requested)**: Offer specific details or examples directly sourced from the dataset, explicitly citing the source. +3. **Unavailable Information**: If the dataset lacks information for a query, respond with: *"This information is not available in the dataset."* +4. **Next Steps**: Suggest follow-up queries or related topics the user might explore. + + +### Key Instructions: +- **Do Not Hallucinate**: Never provide information that is not explicitly present in the dataset. If uncertain, state clearly that the information is unavailable. +- **Transparency**: Reference specific chunks, sections, or metadata fields for every detail provided. +- **Avoid Inference**: Refrain from combining or interpreting unrelated information unless explicitly connected within the dataset. +- **Focus on Relevance**: Ensure answers are concise, precise, and directly address the user’s query. + + +Adapt to the user's needs by maintaining strict adherence to the dataset while offering actionable and transparent insights. +``` \ No newline at end of file diff --git a/Docs/Design/RSS_Ranking.md b/Docs/Design/RSS_Ranking.md index 3fb8500ce..d73a9c6bb 100644 --- a/Docs/Design/RSS_Ranking.md +++ b/Docs/Design/RSS_Ranking.md @@ -3,16 +3,23 @@ ## Introduction + +RSS + https://feedparser.readthedocs.io/en/latest/ + https://github.com/kurtmckee/feedparser + + https://towardsdatascience.com/a-mixed-methods-approach-to-offline-evaluation-of-news-recommender-systems-7dc7e9f0b501 https://towardsdatascience.com/making-news-recommendations-explainable-with-large-language-models-74f119c7e036 - +https://blog.det.life/from-scrolls-to-similarity-search-building-a-movie-recommender-with-duckdb-vss-8122e4d2e486?gi=c094d02d0993 https://www.dogesec.com/blog/full_text_rss_atom_blog_feeds/ https://arxiv.org/abs/2411.19352 +https://arxiv.org/abs/2412.18082 - - - +https://arxiv.org/abs/2412.08604 +https://arxiv.org/abs/2411.18814 +https://blog.badsectorlabs.com/files/blogs.txt diff --git a/Docs/Design/Researcher.md b/Docs/Design/Researcher.md index 97bed007e..c4203c00f 100644 --- a/Docs/Design/Researcher.md +++ b/Docs/Design/Researcher.md @@ -1,13 +1,242 @@ # Automated Researcher ## Introduction +- This page is to document efforts towards creating a 'research' agent workflow for use within the project. The goal is to create a system that can automatically generate research reports, summaries, and other research-related tasks. + +### Researcher Goals +1. f +2. f +3. f +4. f +5. f +6. f +7. f +8. + + +### Ideas +Gated, checkpoints with 'retry, skip, continue' options +s +Follow gptresearchers method at first, planner LLM -> query LLM -> analyzer LLM -> summarizer LLM + + +### Researcher Workflow + + +### Researcher Components +1. **Query/Search Engine** + - f + - f +2. **Planner** + - f + - f +3. **Analyzer** + - f + - f +4. **(Optional: Summarizer)** + - f + - f +5. **Report Generator** + - f + - f +6. **Knowledge Base Management** + - f + - f + + +### Researcher Config Definitions +- `default_search_engine`: The default search engine to use for queries + +- Researcher config section +``` +[researcher] +# Researcher settings +default_search_engine = google +# Options are: google, bing, yandex, baidu, searx, kagi, serper, tavily +default_search_type = web +# Options are: web, local, both +default_search_language = en +# Options are: FIXME +default_search_report_language = en +# Options are: FIXME +default_search_sort = relevance +# Options are: relevance, date +default_search_safe_search = moderate +# Options are: off, moderate, strict +default_search_planner = openai-o1-full +# Options are: FIXME +default_search_planner_max_tokens = 8192 +default_search_analyzer = openai-o1-full +# Options are: FIXME +default_search_analyzer_max_tokens = 8192 +default_search_summarization = openai-o1-full +# Options are: FIXME +default_search_summarization_max_tokens = 8192 +search_max_results = 100 +search_report_format = markdown +# Options are: markdown, html, pdf +search_max_iterations = 5 +search_max_subtopics = 4 +search_custom_user_agent = "CUSTOM_USER_AGENT_HERE" +search_blacklist_URLs = "URL1,URL2,URL3" +``` + + +Perplexica + https://github.com/ItzCrazyKns/Perplexica/blob/master/src/search/metaSearchAgent.ts + https://github.com/ItzCrazyKns/Perplexica/blob/master/src/chains/suggestionGeneratorAgent.ts + https://github.com/ItzCrazyKns/Perplexica/blob/master/src/chains/imageSearchAgent.ts + https://github.com/ItzCrazyKns/Perplexica/blob/master/src/search/metaSearchAgent.ts + +Falle + https://github.com/rashadphz/farfalle/blob/main/src/backend/agent_search.py -This page is to document efforts towards creating a 'research' agent workflow for use within the project. The goal is to create a system that can automatically generate research reports, summaries, and other research-related tasks. ### Link Dump: +Articles + https://docs.gptr.dev/blog/gptr-hybrid + https://docs.gptr.dev/docs/gpt-researcher/context/local-docs + https://docs.gptr.dev/docs/gpt-researcher/context/tailored-research# + https://docs.gptr.dev/docs/gpt-researcher/gptr/pip-package + +Standford STORM + https://arxiv.org/abs/2402.14207# + https://storm.genie.stanford.edu/ + +Google Learn About + https://learning.google.com/experiments/learn-about + +Google Pinpoint + https://journaliststudio.google.com/pinpoint/about/ + +Gemini Deepresearcher + https://blog.google/products/gemini/google-gemini-deep-research/ + https://github.com/assafelovic/gpt-researcher https://arxiv.org/abs/2411.15114 -https://journaliststudio.google.com/pinpoint/about/ + +https://github.com/binary-husky/gpt_academic/blob/master/docs/README.English.md +https://arxiv.org/abs/2409.13741 +https://github.com/assafelovic/gpt-researcher/tree/master/gpt_researcher + +https://github.com/neuml/annotateai +https://docs.gptr.dev/docs/gpt-researcher/multi_agents/langgraph +https://pub.towardsai.net/learn-anything-with-ai-and-the-feynman-technique-00a33f6a02bc +https://help.openalex.org/hc/en-us/articles/24396686889751-About-us +https://www.ginkgonotes.com/ +https://github.com/assafelovic/gpt-researcher/tree/master/multi_agents +https://www.reddit.com/r/Anki/comments/17u01ge/spaced_repetition_algorithm_a_threeday_journey/ +https://github.com/open-spaced-repetition/fsrs4anki/wiki/Spaced-Repetition-Algorithm:-A-Three%E2%80%90Day-Journey-from-Novice-to-Expert#day-3-the-latest-progress +https://www.scrapingdog.com/blog/scrape-google-news/ +https://github.com/mistralai/cookbook/blob/main/third_party/LlamaIndex/llamaindex_arxiv_agentic_rag.ipynb +https://github.com/ai-christianson/RA.Aid +https://github.com/cbuccella/perplexity_research_prompt/blob/main/general_research_prompt.md +https://github.com/0xeb/TheBigPromptLibrary/blob/main/SystemPrompts/Perplexity.ai/20241024-Perplexity-Desktop-App.md +https://github.com/rashadphz/farfalle +https://github.com/cbuccella/perplexity_research_prompt/blob/main/general_research_prompt.md +https://www.emergentmind.com/ +https://github.com/neuml/paperai +https://github.com/neuml/paperetl +https://github.com/ai-christianson/RA.Aid +https://github.com/Future-House/paper-qa +https://openreview.net/ +https://www.researchrabbit.ai/ +https://github.com/faraz18001/Sales-Llama +https://github.com/memgraph/memgraph +https://github.com/rashadphz/farfalle/tree/main/src/backend + +https://github.com/rashadphz/farfalle/blob/main/src/backend/agent_search.py +https://github.com/rashadphz/farfalle/blob/main/src/backend/prompts.py +https://github.com/stanford-oval/storm/ +https://github.com/stanford-oval/storm/blob/main/examples/storm_examples/run_storm_wiki_claude.py +https://learning.google.com/experiments/learn-about + +AI Web Researcher Ollama + https://github.com/TheBlewish/Automated-AI-Web-Researcher-Ollama/blob/main/Self_Improving_Search.py + https://github.com/TheBlewish/Automated-AI-Web-Researcher-Ollama + + + + + +### Researcher Prompts +https://github.com/cbuccella/perplexity_research_prompt +https://github.com/rashadphz/farfalle/blob/main/src/backend/prompts.py + +https://github.com/ItzCrazyKns/Perplexica/tree/master/src/prompts +https://github.com/SakanaAI/AI-Scientist + +``` +SEARCH_QUERY_PROMPT = """\ +Generate a concise list of search queries to gather information for executing the given step. + +You will be provided with: +1. A specific step to execute +2. The user's original query +3. Context from previous steps (if available) + +Use this information to create targeted search queries that will help complete the current step effectively. Aim for the minimum number of queries necessary while ensuring they cover all aspects of the step. + +IMPORTANT: Always incorporate relevant information from previous steps into your queries. This ensures continuity and builds upon already gathered information. + +Input: +--- +User's original query: {user_query} +--- +Context from previous steps: +{prev_steps_context} + +Your task: +1. Analyze the current step and its requirements +2. Consider the user's original query and any relevant previous context +3. Consider the user's original query +4. Generate a list of specific, focused search queries that: + - Incorporate relevant information from previous steps + - Address the requirements of the current step + - Build upon the information already gathered +--- +Current step to execute: {current_step} +--- + +Your search queries based: +""" +``` + +I use NotebookLM daily and find it incredibly helpful. However, I've noticed a potential improvement for the audio creation feature. Currently, when generating audio from a source, it primarily focuses on the provided text. I propose enhancing this by adding a "deep research" component that runs in the background during audio generation. +Imagine this: you provide NotebookLM with a news article about a new AI tool. When you click "create audio," instead of just reading the article, NotebookLM would: + Analyze the Source: Understand the core topic, key terms, and context of the provided source. + Conduct Background Research: Leverage Google's powerful search and knowledge graph to gather additional information related to the topic. This could include: + Official documentation or websites for tools. + Related news articles, blog posts, and research papers. + Expert opinions and analyses. + Relevant historical context. + Integrate Findings: Seamlessly weave the researched information into the audio output, creating a more comprehensive and insightful experience. This could be done by: + Adding explanatory segments or summaries. + Providing context and background information. + Highlighting different perspectives or opinions. + Offering definitions of key terms. +Example: +If the source is an article about "LaMDA," NotebookLM could research: + Google AI's official information on LaMDA. + Recent advancements in large language models. + Ethical considerations surrounding AI language models. + Comparisons to other similar models. +This would result in an audio output that not only summarizes the original article but also provides valuable context and deeper understanding. +Benefits: + More Comprehensive Content: Audio outputs become more informative and valuable for users. + Saves User Time: Users don't have to conduct their own research to get the full picture. + Enhanced Learning Experience: Provides a richer and more engaging way to consume information. + Positions NotebookLM as an Expert Resource: By providing in-depth information, NotebookLM becomes a go-to tool for learning about various topics. +Suggested Implementation Details: + Leverage Google's Existing Tools: Utilize Google Search, Knowledge Graph, and potentially the "deep research" module already present within Google's ecosystem. This would ensure seamless integration and efficient use of existing resources. + Clear User Controls: Provide options for users to customize the depth of research (e.g., "basic," "moderate," "in-depth"). This gives users control over the process and prevents information overload. + Citation and Source Linking: Include links to the researched sources within the NotebookLM document associated with the audio, providing transparency and allowing users to verify information. + Integration with Google Lens: If an image is part of the source, use Google Lens to extract text and context, further enhancing the research capabilities. +Additional Features: + Option to Exclude Research: Allow users to disable background research if they only want a direct reading of the source. + Customizable Research Focus: Allow users to specify keywords or areas of focus for the background research, allowing for more targeted results. + Multilingual Research: Expand research capabilities to multiple languages, making the feature more globally accessible. +By implementing this feature, NotebookLM can become an even more powerful tool for learning and understanding complex topics, providing users with comprehensive and insightful audio experiences. \ No newline at end of file diff --git a/Docs/Design/Search.md b/Docs/Design/Search.md new file mode 100644 index 000000000..744fd95d3 --- /dev/null +++ b/Docs/Design/Search.md @@ -0,0 +1,9 @@ +# Search + + +### Link Dump +https://pub.towardsai.net/hnsw-small-world-yes-but-how-in-the-world-is-it-navigable-77701ed37e20 +https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1 +https://github.com/quickwit-oss/tantivy + + diff --git a/Docs/Design/Storytelling_and_Creation.md b/Docs/Design/Storytelling_and_Creation.md new file mode 100644 index 000000000..644429cf8 --- /dev/null +++ b/Docs/Design/Storytelling_and_Creation.md @@ -0,0 +1,15 @@ +# Storytelling & Creation + + + +### Link Dump: +https://github.com/mhgolkar/Arrow +https://github.com/neph1/LlamaTale +https://jianzongwu.github.io/projects/diffsensei/ +https://github.com/jianzongwu/DiffSensei +https://github.com/joonspk-research/generative_agents + + +### Storytelling + +### Creating Stories diff --git a/Docs/Design/Structured_Outputs.md b/Docs/Design/Structured_Outputs.md index 9607ceaca..72d803061 100644 --- a/Docs/Design/Structured_Outputs.md +++ b/Docs/Design/Structured_Outputs.md @@ -1,26 +1,69 @@ # Structured Outputs + +https://towardsdatascience.com/diving-deeper-with-structured-outputs-b4a5d280c208 + + ## Introduction This page serves as documentation regarding the structured outputs within tldw and provides context/justification for the decisions made within the module. ## Structured Outputs +- Structured outputs are useful for generating structured data from unstructured text. -### Link Dump: -https://github.com/yobix-ai/extractous -https://llamahub.ai/l/readers/llama-index-readers-microsoft-sharepoint -https://blog.dottxt.co/say-what-you-mean.html -https://github.com/dottxt-ai/demos/tree/main/lore-generator -https://github.com/dottxt-ai/cursed/tree/main/scp -https://python.useinstructor.com/ -https://github.com/mlc-ai/xgrammar -https://github.com/guidance-ai/guidance -https://blog.dottxt.co/coalescence.html -https://arxiv.org/html/2408.02442v1 -https://www.boundaryml.com/blog/sota-function-calling -https://arxiv.org/abs/2408.02442 -https://towardsdatascience.com/enforcing-json-outputs-in-commercial-llms-3db590b9b3c8 -https://python.plainenglish.io/generating-perfectly-structured-json-using-llms-all-the-time-13b7eb504240 -https://docs.pydantic.dev/latest/ -https://github.com/outlines-dev/outlines[ -https://github.com/Dan-wanna-M/formatron/tree/master +### Use Cases +1. File Creation + - .ical file (calendar file creation) + - .json file (structured data) + - .csv file (Anki Flash cards + structured data) + - .xml file + - .yaml file + - .toml file + - +2. Data Extraction + - https://github.com/yobix-ai/extractous + - Can use structured outputs for data extraction from unstructured text. Though why isn't this talked about/even mentioned in any of the papers about RAG or writeups on RAG implementations? hmmmm...... +3. Data Generation + - Can use structured outputs for data generation from unstructured text. + - Could come in handy for RPGs/Text-based games reliant on world building/lore generation. + + +### Implementation +- Integration for file creation +- Look at using for ETL pipeline +- Support/integration for content creation pipelines for RPG campaigns, etc. + + +Process + https://python.plainenglish.io/generating-perfectly-structured-json-using-llms-all-the-time-13b7eb504240 +Tools + https://python.useinstructor.com/ + https://github.com/mlc-ai/xgrammar + https://github.com/guidance-ai/guidance + https://github.com/boundaryml/baml + https://docs.pydantic.dev/latest/ + https://github.com/outlines-dev/outlines + https://github.com/Dan-wanna-M/formatron/tree/master + https://github.com/whyhow-ai/knowledge-table + https://github.com/guardrails-ai/guardrails + +Examples + https://github.com/dottxt-ai/demos/tree/main/lore-generator + https://github.com/dottxt-ai/demos/tree/main/logs + https://github.com/dottxt-ai/demos/tree/main/earnings-reports + https://github.com/dottxt-ai/demos/tree/main/its-a-smol-world + https://github.com/dottxt-ai/cursed/tree/main/scp + + +Reliability/Quality of: + https://dylancastillo.co/posts/say-what-you-mean-sometimes.html + https://blog.dottxt.co/say-what-you-mean.html + +Papers + https://arxiv.org/html/2408.02442v1 - Structured Outputs harms reasoning capabilities + + +Gemini + https://ai.google.dev/gemini-api/docs/structured-output?lang=python + +### Link Dump: diff --git a/Docs/Design/Summarization.md b/Docs/Design/Summarization.md new file mode 100644 index 000000000..ed32ef8b6 --- /dev/null +++ b/Docs/Design/Summarization.md @@ -0,0 +1,22 @@ +# Summarization + +## Introduction + +This page is to document the 'summarization' workflow for use within the project. The goal is to create a system that can automatically generate summaries of text, documents, and other content. + + + +### Summarization Goals + + +### Relevant Research + + + +### Link Dump +https://neptune.ai/blog/llm-evaluation-text-summarization +https://phoenix.arize.com/llm-summarization-getting-to-production/ +https://blog.metrostar.com/iteratively-summarize-long-documents-llm +https://arxiv.org/html/2412.15487v1 +https://arxiv.org/pdf/2204.01849 + diff --git a/Docs/Design/TTA.md b/Docs/Design/TTA.md new file mode 100644 index 000000000..157bd000d --- /dev/null +++ b/Docs/Design/TTA.md @@ -0,0 +1,9 @@ +# Text-to-Audio (TTA) + +## Introduction + + +### Link Dump: +https://github.com/declare-lab/Tangoflux +https://tangoflux.github.io/ + diff --git a/Docs/Design/TTS_STT.md b/Docs/Design/TTS_STT.md index 19b3dbdab..7ac47c0cc 100644 --- a/Docs/Design/TTS_STT.md +++ b/Docs/Design/TTS_STT.md @@ -1,19 +1,510 @@ # Text-To-Speech / Speech-To-Text Documentation ## Overview +Use of functions for individual services. +Function for each service, streaming & non-streaming. +Non-streaming will return a file, streaming will return a stream. +Use of temporary files for storage. +Use of pydub for audio manipulation. +Use of pydub for audio merging. +Flow: +1. Clean/format input text +2. Split text into segments +3. Generate audio for each segment using designated provider function +4. Merge audio segments into single output file +5. Clean up temporary files + +### Services +- Google Cloud Text-to-Speech + - https://cloud.google.com/text-to-speech/docs/ssml + + +### Benchmarks +https://huggingface.co/blog/big-bench-audio-release + https://huggingface.co/datasets/ArtificialAnalysis/big_bench_audio +https://artificialanalysis.ai/models/speech-to-speech + + + + + +### Link Dump: +https://github.com/albirrkarim/react-speech-highlight-demo +https://funaudiollm.github.io/cosyvoice2/ +https://funaudiollm.github.io/cosyvoice2/ +https://github.com/InternLM/InternLM-XComposer/tree/main/InternLM-XComposer-2.5-OmniLive +https://github.com/Azure-Samples/aisearch-openai-rag-audio +https://www.reddit.com/r/LocalLLaMA/comments/1f0awd6/best_local_open_source_texttospeech_and/ +https://github.com/FanaHOVA/smol-podcaster +https://docs.inferless.com/cookbook/serverless-customer-service-bot +https://wave-pulse.io/ +https://huggingface.co/spaces/saq1b/podcastgen/blob/main/app.py +https://huggingface.co/spaces/mozilla-ai/document-to-podcast/blob/main/app.py +https://huggingface.co/spaces/Nymbo/Voice-Clone-Multilingual/tree/main +https://github.com/aedocw/epub2tts +https://github.com/microsoft/SpeechT5 +https://www.lightnote.co/?utm_source=www.hivefive.community&utm_medium=newsletter&utm_campaign=hive-five-202-a-bias-to-action +https://github.com/smellslikeml/dolla_llama +https://github.com/dnhkng/GlaDOS +https://arxiv.org/abs/2501.01384 +https://sharechatx.github.io/ +https://github.com/vipchengrui/traditional-speech-enhancement +https://github.com/ictnlp/LLaMA-Omni +https://github.com/lamm-mit/PDF2Audio +https://github.com/Purfview/whisper-standalone-win +https://github.com/livekit/agents +https://huggingface.co/papers/2410.02678 +https://github.com/Picovoice/speech-to-text-benchmark +https://huggingface.co/spaces/bencser/episodegen + + + +STT + https://github.com/KoljaB/RealtimeSTT + https://github.com/southbridgeai/offmute + MoonShine + https://github.com/usefulsensors/moonshine + https://github.com/huggingface/transformers.js-examples/tree/main/moonshine-web + https://huggingface.co/onnx-community/moonshine-base-ONNX + +TTS + https://github.com/KoljaB/RealtimeTTS + https://si.inc/hertz-dev/ + +101 + https://www.inferless.com/learn/comparing-different-text-to-speech---tts--models-for-different-use-cases + https://clideo.com/resources/what-is-tts + https://pub.towardsai.net/the-ultimate-guide-to-audio-processing-principles-techniques-and-applications-7724efea00e8 + RVC 101 + https://gudgud96.github.io/2024/09/26/annotated-rvc/ + +Datasets(?) + https://voice-models.com/ + +Auralis + https://github.com/astramind-ai/Auralis + https://www.astramind.ai/post/auralis + +Amphion + https://github.com/open-mmlab/Amphion + https://huggingface.co/amphion/Vevo + https://github.com/open-mmlab/Amphion/blob/main/models/vc/vevo/README.md + https://openreview.net/pdf?id=anQDiQZhDP + https://versavoice.github.io/ + +Bark +https://github.com/suno-ai/bark + +ChatTTS +https://huggingface.co/2Noise/ChatTTS +https://chattts.com/#Demo + +Coqui TTS + https://github.com/idiap/coqui-ai-TTS + https://huggingface.co/spaces/coqui/xtts/blob/main/app.py + +Cartesia + https://docs.cartesia.ai/get-started/make-an-api-request + +F5 TTS + https://github.com/SWivid/F5-TTS + +lina TTS +https://github.com/theodorblackbird/lina-speech/blob/main/InferenceLina.ipynb +https://github.com/theodorblackbird/lina-speech + +Podcastfy + https://github.com/souzatharsis/podcastfy/blob/main/podcastfy/tts/base.py + https://github.com/souzatharsis/podcastfy/blob/main/podcastfy/text_to_speech.py + https://github.com/souzatharsis/podcastfy/blob/main/podcastfy/content_generator.py + +GLM-4-Voice + https://github.com/THUDM/GLM-4-Voice/blob/main/README_en.md + https://github.com/THUDM/GLM-4-Voice/tree/main + +MoonShine + https://huggingface.co/onnx-community/moonshine-base-ONNX + https://huggingface.co/spaces/webml-community/moonshine-web + https://github.com/huggingface/transformers.js-examples/tree/main/moonshine-web + +Gemini + https://ai.google.dev/gemini-api/docs#rest + https://ai.google.dev/gemini-api/docs/models/gemini-v2 + https://github.com/google-gemini/cookbook/blob/main/quickstarts/Audio.ipynb + +ElevenLabs + https://github.com/elevenlabs/elevenlabs-examples/blob/main/examples/text-to-speech/python/text_to_speech_file.py + https://elevenlabs.io/docs/api-reference/text-to-speech + https://elevenlabs.io/docs/developer-guides/how-to-use-tts-with-streaming Models - https://huggingface.co/NexaAIDev/Qwen2-Audio-7B-GGUF + https://huggingface.co/NexaAIDev/Qwen2-Audio-7B-GGUF +Merging Audio + https://github.com/jiaaro/pydub -GPT-SoviTTS -https://levelup.gitconnected.com/great-api-design-comprehensive-guide-from-basics-to-best-practices-9b4e0b613a44?source=home---------56-1--------------------0fc48da7_5ce6_48ca_92d2_260680a20318-------3 -https://rentry.org/GPT-SoVITS-guide -https://github.com/RVC-Boss/GPT-SoVITS + + +MaskGCT + https://maskgct.github.io/#emotion-samples + https://github.com/open-mmlab/Amphion/blob/main/models/tts/maskgct/README.md + https://github.com/open-mmlab/Amphion/blob/main/models/tts/maskgct/maskgct_demo.ipynb + https://github.com/open-mmlab/Amphion/blob/main/models/tts/maskgct/maskgct_inference.py + https://huggingface.co/amphion/MaskGCT + +Mimic + https://github.com/MycroftAI/mimic3 + + +Parler + https://github.com/huggingface/parler-tts + +Piper (linux only) + https://github.com/rhasspy/piper + https://github.com/rhasspy/piper/issues/644 + https://github.com/rhasspy/piper/discussions/326#discussioncomment-7935208 + https://noerguerra.com/how-to-read-text-aloud-with-piper-and-python/ + https://ssamjh.nz/create-custom-piper-tts-voice/ + https://www.trycatchdebug.net/news/1377664/realtime-tts-with-pipertts-and-openai + https://huggingface.co/rhasspy/piper-voices/tree/main + https://huggingface.co/datasets/rhasspy/piper-checkpoints/tree/main + +Sherpa ONNX + https://github.com/k2-fsa/sherpa-onnx + +YourTTS + https://github.com/Edresson/YourTTS TTS Pipeline https://www.astramind.ai/post/auralis -https://github.com/cpumaxx/sovits-ff-plugin \ No newline at end of file +https://github.com/cpumaxx/sovits-ff-plugin + + + +Train using: https://github.com/Mangio621/Mangio-RVC-Fork/releases, +import the .pth into https://huggingface.co/wok000/vcclient000/tree/main to convert your voice in near real time with about a .25s delay + +https://www.hackster.io/lhl/voicechat2-local-ai-voice-chat-4c48f2 + +https://github.com/abus-aikorea/voice-pro + +https://github.com/myshell-ai/MeloTTS +https://github.com/idiap/coqui-ai-TTS +https://docs.inferless.com/cookbook/serverless-customer-service-bot + + +https://huggingface.co/spaces/lamm-mit/PDF2Audio + +https://huggingface.co/spaces/bencser/episodegen +https://github.com/myshell-ai/MeloTTS +https://github.com/idiap/coqui-ai-TTS +https://docs.inferless.com/cookbook/serverless-customer-service-bot +https://github.com/Picovoice/speech-to-text-benchmark +Train using: https://github.com/Mangio621/Mangio-RVC-Fork/releases, +import the .pth into https://huggingface.co/wok000/vcclient000/tree/main to convert your voice in near real time with about a .25s delay + +https://www.hackster.io/lhl/voicechat2-local-ai-voice-chat-4c48f2 +https://huggingface.co/papers/2410.02678 + +https://github.com/livekit/agents +https://github.com/pipecat-ai/pipecat/tree/a367a038f1a3967292b5de5b43b8600a82a73fb6?tab=readme-ov-file + +https://github.com/lamm-mit/PDF2Audio +https://github.com/Purfview/whisper-standalone-win +https://github.com/ictnlp/LLaMA-Omni +https://levelup.gitconnected.com/build-a-real-time-ai-voice-and-video-chat-app-with-function-calling-by-gemini-2-0-49599a48fbe9?gi=c894f6c092be +https://github.com/agituts/gemini-2-podcast +https://github.com/SWivid/F5-TTS + + +https://github.com/matatonic/openedai-speech + +https://github.com/RVC-Boss/GPT-SoVITS +https://www.bilibili.com/video/BV11iiNegEGP/ +https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) +https://rentry.org/GPT-SoVITS-guide +https://rentry.org/GPT-SoVITS-guide +It's just the 3 buttons (speech-to-text, ssl, semantics) and then training. + +The default training settings on the gradio UI are fine but I save epoch 12-16-24 on SoVITS for testing as that's the sweet spot range. + +Next thing that matters a lot is the ref audio you pick, and you can also drop your entire dataset into the "multiple references to average tone" box, which can improve the voice + +Only thing I changed was remove the space at the beginning of each lines in your list file + +(Look at batch size/ list file) + +And make sure you get the latest version https://github.com/RVC-Boss/GPT-SoVITS/releases + +https://github.com/souzatharsis/podcastfy + +https://github.com/THUDM/GLM-4-Voice/tree/main + +https://huggingface.co/cydxg/glm-4-voice-9b-int4/blob/main/README_en.md + +https://github.com/meta-llama/llama-recipes/tree/main/recipes%2Fquickstart%2FNotebookLlama + + +https://sakshi113.github.io/mmau_homepage/ + +https://github.com/fishaudio/fish-speech/tree/main +https://github.com/fishaudio/fish-speech/blob/main/Start_Agent.md +https://huggingface.co/fishaudio/fish-agent-v0.1-3b/tree/main + +https://github.com/pixelpump/Ai-Interview-Assistant-Python +https://github.com/coqui-ai/TTS +https://github.com/Standard-Intelligence/hertz-dev +https://github.com/2noise/ChatTTS + +https://github.com/edwko/OuteTTS +https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF +https://huggingface.co/NexaAIDev/Qwen2-Audio-7B-GGUF + +https://www.twilio.com/en-us/blog/twilio-openai-realtime-api-launch-integration +https://github.com/huggingface/speech-to-speech +https://github.com/harvestingmoon/S2S +https://github.com/collabora/WhisperLive +https://github.com/JarodMica/audiobook_maker +https://github.com/myshell-ai/OpenVoice +https://github.com/JarodMica/GPT-SoVITS-Package +https://github.com/shagunmistry/NotebookLM_Alternative/tree/main/ai_helper +https://docs.cartesia.ai/get-started/make-an-api-request +https://github.com/JarodMica/open-neruosama +https://github.com/flatmax/speech-to-text +https://arxiv.org/abs/2412.18566 +https://github.com/Rolandjg/skool4free + + +SoundStorm + https://deepmind.google/discover/blog/pushing-the-frontiers-of-audio-generation/ + https://github.com/lucidrains/soundstorm-pytorch + + +Google +https://github.com/google-gemini/cookbook/tree/main/gemini-2 +https://discuss.ai.google.dev/t/how-does-one-get-access-to-the-api-for-tts-features-of-gemini-2-0/53925/15 +https://illuminate.google.com/home?pli=1 +``` +import asyncio +import base64 +import json +import numpy as np +import os +import websockets +import wave +import contextlib +import pygame +from IPython.display import display, Markdown + +# ANSI color codes +GREEN = "\033[92m" +YELLOW = "\033[93m" +RED = "\033[91m" +BLUE = "\033[94m" +RESET = "\033[0m" + +voices = {"Puck", "Charon", "Kore", "Fenrir", "Aoede"} + +# --- Configuration --- +MODEL = 'models/gemini-2.0-flash-exp' +GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") +if not GOOGLE_API_KEY: + raise EnvironmentError("GOOGLE_API_KEY environment variable is not set.") +HOST = 'generativelanguage.googleapis.com' +URI = f'wss://{HOST}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={GOOGLE_API_KEY}' + +# Audio parameters +WAVE_CHANNELS = 1 # Mono audio +WAVE_RATE = 24000 +WAVE_SAMPLE_WIDTH = 2 + + +@contextlib.contextmanager +def wave_file(filename, channels=WAVE_CHANNELS, rate=WAVE_RATE, sample_width=WAVE_SAMPLE_WIDTH): + """Context manager for creating and managing wave files.""" + try: + with wave.open(filename, "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(sample_width) + wf.setframerate(rate) + yield wf + except wave.Error as e: + print(f"{RED}Error opening wave file '{filename}': {e}{RESET}") + raise + + +async def audio_playback_task(file_name, stop_event): + """Plays audio using pygame until stopped.""" + print(f"{BLUE}Starting playback: {file_name}{RESET}") + try: + pygame.mixer.music.load(file_name) + pygame.mixer.music.play() + while pygame.mixer.music.get_busy() and not stop_event.is_set(): + await asyncio.sleep(0.1) + except pygame.error as e: + print(f"{RED}Pygame error during playback: {e}{RESET}") + except Exception as e: + print(f"{RED}Unexpected error during playback: {e}{RESET}") + finally: + print(f"{BLUE}Playback complete: {file_name}{RESET}") + + +async def generate_audio(ws, text_input: str, voice_name="Kore") -> None: + """ + Sends text input to the Gemini API, receives an audio response, saves it to a file, and plays it back. + Relies on the server to maintain the session history. + """ + pygame.mixer.init() # Initialize pygame mixer + + msg = { + "client_content": { + "turns": [{"role": "user", "parts": [{"text": text_input}]}], + "turn_complete": True, + } + } + await ws.send(json.dumps(msg)) + + responses = [] + async for raw_response in ws: + response = json.loads(raw_response.decode()) + server_content = response.get("serverContent") + if server_content is None: + break + + model_turn = server_content.get("modelTurn") + if model_turn: + parts = model_turn.get("parts") + if parts: + for part in parts: + if "inlineData" in part and "data" in part["inlineData"]: + pcm_data = base64.b64decode(part["inlineData"]["data"]) + responses.append(np.frombuffer(pcm_data, dtype=np.int16)) + + turn_complete = server_content.get("turnComplete") + if turn_complete: + break + + if responses: + display(Markdown(f"{YELLOW}**Response >**{RESET}")) + audio_array = np.concatenate(responses) + file_name = 'output.wav' + with wave_file(file_name) as wf: + wf.writeframes(audio_array.tobytes()) + stop_event = asyncio.Event() + try: + await audio_playback_task(file_name, stop_event) + except Exception as e: + print(f"{RED}Error during audio playback: {e}{RESET}") + else: + print(f"{YELLOW}No audio returned{RESET}") + pygame.mixer.quit() # clean up pygame mixer + + +async def main(): + print(f"{GREEN}Available voices: {', '.join(voices)}{RESET}") + default_voice = "Kore" + print(f"{GREEN}Default voice is set to: {default_voice}, you can change it in the code{RESET}") + + config = { + "response_modalities": ["AUDIO"], + "speech_config": { + "voice_config": { + "prebuilt_voice_config": { + "voice_name": default_voice # Set voice + } + } + } + } + + async with websockets.connect(URI) as ws: + + async def setup(ws) -> None: + await ws.send( + json.dumps( + { + "setup": { + "model": MODEL, + "generation_config": config, + } + } + ) + ) + + raw_response = await ws.recv(decode=False) + setup_response = json.loads(raw_response.decode("ascii")) + print(f"{GREEN}Connected: {setup_response}{RESET}") + + await setup(ws) + while True: + text_prompt = input(f"{YELLOW}Enter your text (or type 'exit' to quit): {RESET}") + if text_prompt.lower() == "exit": + break + + try: + await generate_audio(ws, text_prompt, default_voice) + except Exception as e: + print(f"{RED}An error occurred: {e}{RESET}") + + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +### GPT-SoVITS + +- [GPT-SoVITS](f) +- [GPT-SoVITS-guide rentry.org](https://rentry.org/GPT-SoVITS-guide) +- Setup Guide: https://ai-hub-docs.vercel.app/tts/gpt-sovits/ + + +GPT-SoviTTS + https://levelup.gitconnected.com/great-api-design-comprehensive-guide-from-basics-to-best-practices-9b4e0b613a44?source=home---------56-1--------------------0fc48da7_5ce6_48ca_92d2_260680a20318-------3 + https://rentry.org/GPT-SoVITS-guide + https://github.com/RVC-Boss/GPT-SoVITS + https://github.com/cpumaxx/sovits-ff-plugin + https://github.com/HanxSmile/Simplify-GPT-SoVITS + https://github.com/lrxwisdom001/GPT-SoVITS-Novels/tree/main/voice_synthesis + openneurosama - https://github.com/JarodMica/open-neruosama/blob/master/main.py + https://huggingface.co/cpumaxx/SoVITS-anime-mini-tts + +https://tts.x86.st/ +Finetuning is very quick (about 5 minutes). Captioning of audio was automated with faster-whisper (it is required that the audio is captioned). +With the default batch size of 12, training takes 9.5~ GB. + +Inference + https://github.com/RVC-Boss/GPT-SoVITS/blob/main/GPT_SoVITS/inference_cli.py + No WebUI Inference on Colab: https://colab.research.google.com/drive/1gC1lRxuOh4qW8Yz5TA10BEUPR28nJ3VR + Training on Colab: https://colab.research.google.com/drive/1NQGKXYxJcJyTPnHsSyusTdD0l4IdMS37#scrollTo=nhyKqVwcPnvz + No WebUI Training on Colab: https://colab.research.google.com/drive/1LmeM8yUyT9MTYF8OXc-NiBonvdh6hII6 + +Datasets + https://ai-hub-docs.vercel.app/rvc/resources/datasets/ + https://mvsep.com/en + +API + https://github.com/cpumaxx/sovits-ff-plugin + +Comfyui integration + https://github.com/heshengtao/comfyui_LLM_party + + + +- **101** + - F +- **Setup** + - F +- **Training** + - F +- **Inference** + - F +- **Fine-Tuning** + - F + +### Dataset Creation/Curation +https://voiceguide.arimil.com/ + + diff --git a/Docs/Design/Text2SQL.md b/Docs/Design/Text2SQL.md index 2919ac8f7..029ec72f8 100644 --- a/Docs/Design/Text2SQL.md +++ b/Docs/Design/Text2SQL.md @@ -17,3 +17,7 @@ https://ai.gopubby.com/advanced-rag-retrieval-strategy-embedded-tables-fdb3e4400 https://medium.com/intel-tech/tabular-data-rag-llms-improve-results-through-data-table-prompting-bcb42678914b https://github.com/TAG-Research/TAG-Bench https://arxiv.org/pdf/2407.14482 +https://spider2-sql.github.io/ +https://departmentofproduct.substack.com/p/how-to-write-sql-queries-using-ai?utm_medium=email&triedRedirect=true + + diff --git a/Docs/Design/UX.md b/Docs/Design/UX.md index 573e0fd0f..19b6e1130 100644 --- a/Docs/Design/UX.md +++ b/Docs/Design/UX.md @@ -48,4 +48,53 @@ https://copycoder.ai/ https://docs.replit.com/replitai/agent https://bolt.new/ https://github.com/rmusser01/agentic_rag/tree/main +https://github.com/astramind-ai/Pulsar +https://github.com/woshixiaobai2019/mirau-chat-ui +https://uxdesign.cc/the-importance-of-hover-states-c9312d7fd516 +https://trends.uxdesign.cc/ +https://bolters.io/ +https://github.com/Cloud-Code-AI/AkiraDocs + +https://github.com/Vali-98/ChatterUI +https://www.nngroup.com/videos/efficient-error-messages/ +https://uxdesign.cc/a-good-design-is-a-good-essay-197f4bd31c92 +https://github.com/Rugz007/liha +https://astro.new/latest/ +https://github.com/lobehub/lobe-chat +https://www.scoutos.com/ +https://github.com/FishiaT/yawullm +https://ilikeinterfaces.com/2015/03/09/map-ui-ghost-in-the-shell/ +https://jdan.github.io/98.css +https://github.com/vercel/ai-chatbot +https://www.nngroup.com/videos/the-danger-of-defaults/ +https://writings.stephenwolfram.com/2024/12/useful-to-the-point-of-being-revolutionary-introducing-wolfram-notebook-assistant/ +https://en.wikipedia.org/wiki/Template:Google_payment_apps +https://github.com/stackblitz-labs/bolt.diy +https://github.com/Haervwe/open-webui-tools +https://uxdesign.cc/the-evolution-of-attention-c6154276f1b4 +https://otranscribe.com/ +https://www.dive.club/ideas/new-types-of-ui-that-generative-ai-has-necessitated +https://interactjs.io/ +https://perchance.org/ai-chat +https://uiverse.io/elements +https://uxdesign.cc/youre-trying-to-turn-an-oil-tanker-288ed30876e4 +https://uxdesign.cc/simple-recall-testing-for-b2b-ux-134e18330da9 +https://lovable.dev/ +https://markwhen.com/ +https://kando.menu/ +https://deepseek-artifacts.vercel.app/ +https://darkpatternsgame.productartistry.com/ + + +Not waifus, but clippy: + https://github.com/fghrsh/live2d_demo + + +Prompt Engineering page: +- 3 Sections + - Prompt + - Values + - Response +- Looks like Claude Workshop UI window + diff --git a/Docs/Design/VLMs.md b/Docs/Design/VLMs.md index 9926f9e6a..728ad1cb2 100644 --- a/Docs/Design/VLMs.md +++ b/Docs/Design/VLMs.md @@ -4,9 +4,61 @@ ### Link Dump: +https://github.com/jabberjabberjabber/LLMOCR/blob/main/llm-ocr-gui.py#L178 +https://colab.research.google.com/drive/1wkCIO6q8UDJQbPsu8jI_og1JAUibpp38?usp=sharing#scrollTo=5LH3vJS6SkHF +https://colab.research.google.com/drive/1bcSu_mLki11aXpbS6Fwo_WF5idJv8uId?usp=sharing +https://colab.research.google.com/drive/1E-ySj39oldXcvcsbjDYnvW-USi281Llj?usp=sharing + + https://arxiv.org/abs/2411.18279 https://github.com/breezedeus/Pix2Text https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct +https://huggingface.co/tencent/HunyuanVideo +https://huggingface.co/OpenGVLab/InternVL2_5-38B +https://ivy-lvlm.github.io/Video-MA2MBA/ +https://internvl.github.io/blog/2024-12-20-InternVL-2.5-MPO/ +https://huggingface.co/allenai/Molmo-7B-D-0924 +https://github.com/deepseek-ai/DeepSeek-VL2 +https://huggingface.co/Qwen/QVQ-72B-Preview +https://huggingface.co/Qwen/Qwen2-VL-72B +https://huggingface.co/Qwen/Qwen2-VL-7B +https://huggingface.co/Qwen/Qwen2-VL-2B +https://github.com/Lokesh-Chimakurthi/vision-rag +https://github.com/tjmlabs/ColiVara + + + +https://www.reddit.com/r/StableDiffusion/comments/1h7hunp/how_to_run_hunyuanvideo_on_a_single_24gb_vram_card/ +https://arxiv.org/abs/2412.05185 +https://huggingface.co/collections/OpenGVLab/internvl-25-673e1019b66e2218f68d7c1c +https://huggingface.co/Infinigence/Megrez-3B-Omni +https://huggingface.co/FastVideo/FastHunyuan +https://huggingface.co/papers/2412.07626 +https://huggingface.co/AI-Safeguard/Ivy-VL-llava +https://github.com/matatonic/openedai-vision +https://github.com/breezedeus/Pix2Text +https://github.com/Tencent/HunyuanVideo +https://github.com/huggingface/blog/blob/main/smolvlm.md +https://colivara.com/ +https://huggingface.co/tencent/HunyuanVideo +https://aivideo.hunyuan.tencent.com/ +https://github.com/deepseek-ai/DeepSeek-VL2 +https://arxiv.org/abs/2409.17146 +https://www.reddit.com/r/LocalLLaMA/comments/1hfkytk/answering_my_own_question_i_got_apollo_working/ +https://arxiv.org/abs/2412.09645 +https://rentry.org/crhcqq54 +https://arxiv.org/abs/2412.13501 +https://huggingface.co/IamCreateAI/Ruyi-Mini-7B +https://github.com/tdrussell/diffusion-pipe +https://proptest.ai/?#/playground +https://lyra-omni.github.io/ +Apollo + https://arxiv.org/abs/2412.10360 + https://apollo-lmms.github.io/ + https://huggingface.co/Apollo-LMMs + https://huggingface.co/manysuch-cases/Apollo-Github-Files + https://huggingface.co/GoodiesHere/Apollo-LMMs-Apollo-7B-t32 + https://huggingface.co/GoodiesHere/Apollo-LMMs-Apollo-3B-t32 + https://www.reddit.com/r/LocalLLaMA/comments/1hgri8g/has_apollo_disappeared/ -https://ivy-lvlm.github.io/Video-MA2MBA/ \ No newline at end of file diff --git a/Docs/Design/Visual_Video_Analysis.md b/Docs/Design/Visual_Video_Analysis.md new file mode 100644 index 000000000..09f50be28 --- /dev/null +++ b/Docs/Design/Visual_Video_Analysis.md @@ -0,0 +1,15 @@ +# Visual Video Analysis + + +## Introduction + + +## Visual Video Analysis + + +### Link Dump: +https://github.com/SamurAIGPT/AI-Youtube-Shorts-Generator +https://arxiv.org/abs/2412.19238 + + + diff --git a/Docs/Design/WebScraping.md b/Docs/Design/WebScraping.md new file mode 100644 index 000000000..45a4aab7f --- /dev/null +++ b/Docs/Design/WebScraping.md @@ -0,0 +1,11 @@ +# WebScraping Pipeline + +### Overview +Page describing design and implementation of the web scraping pipeline + + +### Flow + + +### Link Dump +https://github.com/scrapinghub/article-extraction-benchmark diff --git a/Docs/Design/WebSearch.md b/Docs/Design/WebSearch.md index 17807f572..79f0a140b 100644 --- a/Docs/Design/WebSearch.md +++ b/Docs/Design/WebSearch.md @@ -2,40 +2,342 @@ ## Introduction This page serves as documentation regarding the web search functionality within tldw and provides context/justification for the decisions made within the module. +- **High-Level Workflow** + 1. User inputs a search query. + 2. User selects a search engine (Option for default search engine + default query options in config file). + 3. User may select `Advanced Search` for additional search parameters (Language, Date Range, etc). + 4. The user presses 'Search'. + 5. Search is performed -> Results obtained, + 6. Each individual item is first analyzed based on snippet, if relevant, entire page is fetched and analyzed, this is then stored in the results dictionary, and the process is repeated until all results are analyzed/limit is hit. + 7. Once all results are collected, they are then operated on, being used to create whatever final product is desired by the user. + 8. The final product is then passed back to the UI for display to the user. -## Web Search +### Current Status +- Bing, Brave, DDG, Google work for simple searches. Advanced search options are not fully working yet. + - Brave: https://api.search.brave.com/app/documentation/web-search/query#WebSearchAPIQueryParameters + - Bing: https://docs.microsoft.com/en-us/rest/api/cognitiveservices-bingsearch/bing-web-api-v7-reference + - DuckDuckGo: https://duckduckgo.com/ + - Google: https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list +- Baidu, SearX, Serper, Tavily, Yandex are not implemented yet. +- Kagi & SearX are implemented but not working (Kagi because API access and SearX because I'm not sure why) +- Parsing works for Bing, Brave, DDG, Google. +- Currently no use of Structured outputs (to-do...) +- Full Pipeline works. + 1. User enters query + Search options + 2. Query is processed, sub-queries are generated(if specified) + 3. Each query is sent to the search engine API + 4. Search results are returned + 5. Each search result is analyzed, if relevant, the full page is fetched and stored in the results dict + 6. Results are then aggregated and presented to the user +- **To Do:** + 2. Implement the saving options (Web Search DB - allow for saving search results in DB, File, Clipboard, Notes DB - allow for saving search report + citations in a note) + 2. User can also select which results are relevant, and which are not, and remove irrelevant results from the `web_search_results_dict` + 3. Implement the remaining search engines (Baidu, SearX, Serper, Tavily, Yandex) + 4. Implement the advanced search options + 5. Implement the structured outputs + 6. Implement the various output options (Style/Format) / Allow the user to customize the aggregation prompt + 7. Provide the user with follow-up questions + potential other questions, if these are selected, the content is added to the 'ongoing' document -### Link Dump: -https://github.com/appvoid/search -https://github.com/felladrin/MiniSearch -https://github.com/TheBlewish/Web-LLM-Assistant-Llamacpp-Ollama -https://github.com/pengfeng/ask.py -https://cookbook.openai.com/examples/third_party/web_search_with_google_api_bring_your_own_browser_tool -https://developers.google.com/custom-search/v1/overview -https://www.ignorance.ai/p/how-to-build-an-ai-search-engine-83b?publication_id=1407539 -https://www.ignorance.ai/p/how-to-build-an-ai-search-engine -https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/document_loaders/brave_search.py - Could instantiate a browser, perform a search with X engine, and then parse the results. -https://github.com/YassKhazzan/openperplex_backend_os -https://github.com/InternLM/MindSearch -https://github.com/developersdigest/llm-answer-engine +---------------- +### Setting the Stage +- **Text Search Workflow** + 1. User inputs a search query. + 2. User selects a search engine + Query options (Option for default search engine + default query options in config file). + 3. The user presses 'Search'. + - Gradio UI Function makes a call to `process_question` with the search query + search parameters as a dictionary + 4. `process_question()` checks the search params dict to see if sub-query creation is enabled, if so, it creates a list of sub-queries based on the search query with a call to `analyze_question()`. + 5. `analyze_question()` takes the search query and generates a list of sub-queries based on the search query, attempts this 3 times, making a call to the LLM API, and then returns the list of sub-queries if successful. + 6. once back in `process_question()`, all queries are combined into a single query list `#L113`. + 7. `process_question()` then iterates through the query list, making a call to `perform_websearch()` with each query in the list, and the matching search parameters. + 8. `perform_websearch()` makes a call to the selected search engine's API with the query and search parameters, and returns the results. + - This function is a `sink' for all search engine API calls, and is responsible for handling calling the appropriate search engine API call + 9. `process_web_search_results()` then takes the results from the search engine, and processes them, converting them into a dictionary of results in the `web_search_results_dict` dictionary. + - FIXME - this is where I lose track of what's happening, need to re-read the code + - This function returns a filled `web_search_results_dict` dictionary + 10. `process_question()` then takes the `web_search_results_dict` and processes it, checking to make sure it is valid and contains results. + - FIXME - Verify this is correct + 11. FIXME - Make it optional to display the results to the user, and allow them to select which results are relevant before continuing processing + 12. `process_question()` then iterates through each search result, checking if it is relevant, and if so, adds it to the `relevant_results_dict` + - FIXME - The results should be added back to the `web_search_results_dict` if they are relevant. + 13. `process_question()` then calls into `aggregate_results()` function with the `web_search_results_dict` + 14. `aggregate_results()` then takes the `web_search_results_dict` and processes it, combining all the results into a single document + - FIXME - This is not implemented yet and also want various options available for this. + 15. `process_question()` then returns the `web_search_results_dict` to the calling function. + 15. The calling function then takes the `web_search_results_dict` and processes it, extracting the final results/aggregated report and presenting it to the user + 16. The user then has the option to save the results to the DB, or ask follow-up questions, etc. + 17. The user can also select which results are relevant, and which are not, and remove irrelevant results from the `web_search_results_dict` + +- **Function Execution Steps:** + - `def perform_websearch(search_engine, search_query, content_country, search_lang, output_lang, result_count, date_range=None, safesearch=None, site_blacklist=None, exactTerms=None, excludeTerms=None, filter=None, geolocation=None, search_result_language=None, sort_results_by=None)` + - `search_engine` - The search engine to use for the search + - `search_query` - The query to search for + - `content_country` - The country of the content to search for + - `search_lang` - The language to use for the search + - `output_lang` - The language to use for the output + - `result_count` - The number of results to return + - `date_range` - The date range to search within + - `safesearch` - Whether to enable safe search + - `site_blacklist` - A list of sites to exclude from the search results + - `exactTerms` - Terms that must be in the search results + - `excludeTerms` - Terms that must not be in the search results + - `filter` - A filter to apply to the search results + - `geolocation` - The geolocation to use for the search + - `search_result_language` - The language to use for the search results + - `sort_results_by` - How to sort the search results + - **Returns:** A list of search results as a dictionary. - FIXME: Define the structure of the dictionary + - Each result should contain the title, URL, content, and metadata of the search result. + + + +---------------- +### Web Search API +- TBD +``` +def perform_websearch(search_engine + search_query, + content_country, + search_lang, + output_lang, + result_count, - Number of results to return(not applicable to all engines, but necessary for consistency) + date_range=None, + safesearch=None, + site_blacklist=None, + exactTerms=None, + excludeTerms=None, + filter=None, + geolocation=None, + search_result_language=None + sort_results_by=None): +``` + + + +### Implementation + +config.txt options explained +``` +# Search Defaults +search_provider_default = google +search_language_query = en - Language Queries will be performed in +search_language_results = en - Language Results will be returned in +search_language_analysis = en - Language Analysis will be performed in +search_default_max_queries = 10 - Maximum number of queries to perform +search_enable_subquery = True - Enable subqueries +search_enable_subquery_count_max = 5 - Maximum number of subqueries to generate +search_result_rerank = True - Enable result reranking +search_result_max = 15 - Maximum number of results to return +search_result_max_per_query = 10 - Maximum number of results to return per query +search_result_blacklist = [] +search_result_display_type = list - Display type for search results, does nothing right now. +search_result_display_metadata = False - Display metadata for search results, does nothing right now. +search_result_save_to_db = True - Save search results to the database (not implemented yet) +# How you want the results to be written, think 'style' or voice +search_result_analysis_tone = neutral - Tone of the analysis (not implemented yet) +relevance_analysis_llm = openai - LLM to use for relevance analysis +final_answer_llm = openai - LLM to use for final answer generation +#### Search Engines ##### +# Bing +search_engine_country_code_bing = en - Country code for Bing, Where Search 'takes place from' +# +# Brave +search_engine_country_code_brave = US - Country code for Brave, Where Search 'takes place from' +# +# Google +# Restricts search results to documents originating in a particular country. +limit_google_search_to_country = False +google_search_country_code = US - Country code for Google, Where Search 'takes place from' +google_filter_setting = 1 - Filter setting for Google, 0 = No filtering, 1 = Moderate filtering, 2 = Strict filtering +google_user_geolocation = US - Geolocation for user performing the search +google_limit_search_results_to_language = False - Limit search results to a specific language +google_default_search_results = 10 - Default number of search results to return +google_safe_search = "active" - Safe search setting for Google, active, moderate, or off +google_enable_site_search = False - Enable site search +google_site_search_include = - Sites to include in the search +google_site_search_exclude = - Sites to exclude from the search +# https://developers.google.com/custom-search/docs/structured_search#sort-by-attribute +google_sort_results_by = - Sort results by attribute (I honestly couldn't find much about this one) +``` + +Results dictionary: +``` +web_search_results_dict = { + "search_engine": search_engine, + "search_query": search_results.get("search_query", ""), + "content_country": search_results.get("content_country", ""), + "search_lang": search_results.get("search_lang", ""), + "output_lang": search_results.get("output_lang", ""), + "result_count": search_results.get("result_count", 0), + "date_range": search_results.get("date_range", None), + "safesearch": search_results.get("safesearch", None), + "site_blacklist": search_results.get("site_blacklist", None), + "exactTerms": search_results.get("exactTerms", None), + "excludeTerms": search_results.get("excludeTerms", None), + "filter": search_results.get("filter", None), + "geolocation": search_results.get("geolocation", None), + "search_result_language": search_results.get("search_result_language", None), + "sort_results_by": search_results.get("sort_results_by", None), + "results": [ + { + "title": str, + "url": str, + "content": str, + "metadata": { + "date_published": Optional[str], + "author": Optional[str], + "source": Optional[str], + "language": Optional[str], + "relevance_score": Optional[float], + "snippet": Optional[str] + } + }, + ], + "total_results_found": search_results.get("total_results_found", 0), + "search_time": search_results.get("search_time", 0.0), + "error": search_results.get("error", None), + "processing_error": None +} +``` + +---------------- ### Search Engines -- **Google Search** - - [Google Search API]( FIXME ) - - -- **Bing Search** - - [Bing Search API]( FIXME ) - - -- **Yandex Search** - - [Yandex Search API](https://yandex.com/dev/search/) - - -- **Baidu Search** - - [Baidu Search API](https://www.baidu.com/) - - -- **Searx Search** - - [Searx Search API](https://searx.github.io/searx/) - - + +#### Baidu Search +- [Baidu Search API](https://www.baidu.com/) +- Baidu doens't have an official english API, so we'll have to scrape the results or use Serper + + +#### Bing Search +- [Bing Search API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) +- Getting Started with Bing Search API + - Sign up for a Bing Search API key via Azure (1000 Free Searches a month) - https://www.microsoft.com/en-us/bing/apis/pricing + - Use the Bing Search API to perform searches - Add the generated subscription key to your config.txt file. + - If for some reason you're doing modifications to the code(Fuck MS), be aware: https://github.com/Azure-Samples/cognitive-services-REST-api-samples/issues/139 + - Perform searches using Bing! + + +#### Brave Search +Two APIs, 1 for 'AI' the other for 'regular' search + - [Brave Search API](https://brave.com/search/api/) + + +#### DuckDuckGo Search +Uses query to direct DDG search, then scrape the results. +Structure/approach taken from https://github.com/deedy5/duckduckgo_search + + +#### Google Search +- [Google Search API](https://developers.google.com/custom-search/v1/overview) +- Have to create a custom search engine first, get the ID and then the API key +- Setup: + - Setup a `Programmable Search Engine` + - Get the `API Key` + - 100 Search queries per day for free +- Documentation for making requests: https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list + + +#### Kagi Search +- [Kagi Search API](https://help.kagi.com/kagi/api/search.html) +- Really straightforward, just a simple search API + + +#### SearX Search +- [Searx Search Documentation](https://searx.github.io/searx/) +- `SearXNG is a free internet metasearch engine which aggregates results from more than 70 search services. Users are neither tracked nor profiled.` +- Can host your own instance or use someone else's. + + +#### Serper Search + + +#### Tavily Search + + +#### Yandex Search +- https://yandex.cloud/en/docs/search-api/quickstart/ +- [Yandex Search API](https://yandex.com/dev/search/) + +#### Arxiv Search + + +#### PubMedCentral Search +- https://www.ncbi.nlm.nih.gov/home/develop/api/ + + + +### Prompts used: + +sub_question_generation_prompt = +``` +You are an AI assistant that helps generate search queries. Given an original query, suggest alternative search queries that could help find relevant information. Your goal is to generate queries that are diverse, specific, and highly relevant to the original query, ensuring comprehensive coverage of the topic. + Important instructions: + 1. Generate between 2 and 6 queries unless a fixed count is specified. Generate more queries for complex or multifaceted topics and fewer for simple or straightforward ones. + 2. Ensure the queries are diverse, covering different aspects or perspectives of the original query, while remaining highly relevant to its core intent. + 3. Prefer specific queries over general ones, as they are more likely to yield targeted and useful results. + 4. If the query involves comparing two topics, generate separate queries for each topic. + 5. If previous queries and an answer are provided, generate new queries that address the shortcomings of the previous answer and avoid repeating the previous queries. + 6. If the original query is broad or ambiguous, generate queries that explore specific subtopics or clarify the intent. + 7. If the query is too specific or unclear, generate queries that explore related or broader topics to ensure useful results. + 8. Return the queries as a JSON array in the format ["query_1", "query_2", ...]. + Examples: + 1. For the query "What are the benefits of exercise?", generate queries like: + ["health benefits of physical activity", "mental health benefits of exercise", "long-term effects of regular exercise", "how exercise improves cardiovascular health", "role of exercise in weight management"] + 2. For the query "Compare Python and JavaScript", generate queries like: + ["key features of Python programming language", "advantages of JavaScript for web development", "use cases for Python vs JavaScript", "performance comparison of Python and JavaScript", "ease of learning Python vs JavaScript"] + 3. For the query "How does climate change affect biodiversity?", generate queries like: + ["impact of climate change on species extinction", "effects of global warming on ecosystems", "role of climate change in habitat loss", "how rising temperatures affect marine biodiversity", "climate change and its impact on migratory patterns"] + 4. For the query "Best practices for remote work", generate queries like: + ["tips for staying productive while working from home", "how to maintain work-life balance in remote work", "tools for effective remote team collaboration", "managing communication in remote teams", "ergonomic setup for home offices"] + 5. For the query "What is quantum computing?", generate queries like: + ["basic principles of quantum computing", "applications of quantum computing in real-world problems", "difference between classical and quantum computing", "key challenges in developing quantum computers", "future prospects of quantum computing"] + Original query: {original_query} +# +search_result_relevance_eval_prompt = Given the following search results for the user's question: "{original_question}" and the generated sub-questions: {sub_questions}, evaluate the relevance of the search result to the user's question. + Explain your reasoning for selection. + Search Results: + {content} + Instructions: + 1. You MUST only answer TRUE or False while providing your reasoning for your answer. + 2. A result is relevant if the result most likely contains comprehensive and relevant information to answer the user's question. + 3. Provide a brief reason for selection. + You MUST respond using EXACTLY this format and nothing else: + Selected Answer: [True or False] + Reasoning: [Your reasoning for the selections] + +``` + +analyze_search_results_prompt = +``` +Generate a comprehensive, well-structured, and informative answer for a given question, + using ONLY the information found in the provided web Search Results (URL, Page Title, Summary). + Use an unbiased, journalistic tone, adapting the level of formality to match the user’s question. + • Cite your statements using [number] notation, placing citations at the end of the relevant sentence. + • Only cite the most relevant results. If multiple sources support the same point, cite all relevant sources [e.g., 1, 2, 3]. + • If sources conflict, present both perspectives clearly and cite the respective sources. + • If different sources refer to different entities with the same name, provide separate answers. + • Do not add any external or fabricated information. + • Do not include URLs or a reference section; cite inline with [number] format only. + • Do not repeat the question or include unnecessary redundancy. + • Use markdown formatting (e.g., **bold**, bullet points, ## headings) to organize the information. + • If the provided results are insufficient to answer the question, explicitly state what information is missing or unclear. + Structure your answer like this: + 1. **Short introduction**: Briefly summarize the topic (1–2 sentences). + 2. **Bulleted points**: Present key details, each with appropriate citations. + 3. **Conclusion**: Summarize the findings or restate the core answer (with citations if needed). + Example: + 1. **Short introduction**: This topic explores the impact of climate change on agriculture. + 2. **Bulleted points**: + - Rising temperatures have reduced crop yields in some regions [1]. + - Changes in rainfall patterns are affecting irrigation practices [2, 3]. + 3. **Conclusion**: Climate change poses significant challenges to global agriculture [1, 2, 3]. + + {concatenated_texts} + + --------------------- + Make sure to match the language of the user's question. + Question: {question} + Answer (in the language of the user's question): +``` diff --git a/Docs/Handy_Dandy_Papers.md b/Docs/Handy_Dandy_Papers.md index 77316301b..dea511fe7 100644 --- a/Docs/Handy_Dandy_Papers.md +++ b/Docs/Handy_Dandy_Papers.md @@ -4,18 +4,52 @@ https://arxiv.org/pdf/2410.13098 https://arxiv.org/pdf/2404.01413 https://arxiv.org/abs/2411.11910 - +https://arxiv.org/abs/2411.00136 +https://jinyangwu.github.io/hiar-icl/ +https://www.aimodels.fyi/papers/arxiv/beyond-examples-high-level-automated-reasoning-paradigm +https://arxiv.org/abs/2412.02674 +https://arxiv.org/abs/2411.19865 +https://arxiv.org/abs/2412.04315 +https://arxiv.org/abs/2412.02980 +https://arxiv.org/abs/2412.02906 +https://arxiv.org/pdf/2410.21272 +https://arxiv.org/abs/2412.04318 +https://arxiv.org/abs/2412.02142 +https://arxiv.org/abs/2412.05265 +https://arxiv.org/abs/2412.05346 ### Context https://arxiv.org/pdf/2304.12102 https://arxiv.org/html/2405.20234v2#S3 +### Dataset Generation + https://arxiv.org/abs/2412.04645 + + +### Jailbreaking + https://arxiv.org/abs/2411.01084 + https://arxiv.org/abs/2408.04811 + https://arxiv.org/html/2412.05346v1 + https://github.com/centerforaisafety/HarmBench + https://github.com/dsbowen/strong_reject + ### Reasoning - https://huggingface.co/papers/2411.15862 +https://arxiv.org/abs/2411.19865 +https://arxiv.org/abs/2412.06769 +- https://arxiv.org/abs/2412.01113 +https://huggingface.co/spaces/HuggingFaceH4/blogpost-scaling-test-time-compute +- https://machinelearning.apple.com/research/gsm-symbolic +https://arxiv.org/abs/2402.10200 +- ### Test-Time Compute - https://github.com/GAIR-NLP/O1-Journey - +- https://arxiv.org/abs/2408.03314 +- https://github.com/huggingface/search-and-learn +- https://huggingface.co/spaces/HuggingFaceH4/blogpost-scaling-test-time-compute +https://arxiv.org/abs/2412.18319 +- https://github.com/hkust-nlp/B-STaR ### Personalization https://arxiv.org/abs/2411.16034 @@ -27,6 +61,11 @@ https://arxiv.org/abs/2411.11910 ### RAG https://arxiv.org/html/2407.21059v1 + +### RWKV + https://arxiv.org/abs/2412.14847 + + ### Sampling https://arxiv.org/abs/2411.09661 @@ -38,6 +77,9 @@ https://arxiv.org/abs/2411.11910 ### Structured Generation https://arxiv.org/abs/2411.15100 +### Synthetic data creation +https://arxiv.org/abs/2412.14689 + ### Test-Time related https://mathcritique.github.io/ diff --git a/Docs/Issues/Citations_and_Confabulations.md b/Docs/Issues/Citations_and_Confabulations.md index d2ecd5c7f..0a0de9615 100644 --- a/Docs/Issues/Citations_and_Confabulations.md +++ b/Docs/Issues/Citations_and_Confabulations.md @@ -5,8 +5,22 @@ 2. [Confabulations](#confabulations) 3. [References](#references) + RAG https://www.lycee.ai/blog/rag-ragallucinations-and-how-to-fight-them + https://huggingface.co/PleIAs/Pleias-Nano + https://arxiv.org/abs/2412.11536 + https://cloud.google.com/generative-ai-app-builder/docs/check-grounding + https://cloud.google.com/generative-ai-app-builder/docs/grounded-gen + https://arxiv.org/html/2412.15189v1#S6 + https://aclanthology.org/2024.fever-1.10/ + https://arxiv.org/pdf/2412.15189 + https://huggingface.co/papers/2408.12060 + https://primer.ai/research/rag-v-divide-and-conquer-with-factual-claims/ + https://arxiv.org/abs/2411.06037 + https://www.sciencedirect.com/science/article/abs/pii/S0306457320309675 + https://github.com/Huffon/factsumm + https://arxiv.org/abs/2410.07176 Finetuning: - https://eugeneyan.com/writing/finetuning/ @@ -17,8 +31,13 @@ Finetuning: - **101** - Unsorted - https://mattyyeung.github.io/deterministic-quoting#7-conclusion-is-this-really-ready-for-healthcare + https://github.com/sunnynexus/RetroLLM + - https://github.com/MadryLab/context-cite - +Abstractive Proposition Segmentation + https://arxiv.org/abs/2406.19803 + https://huggingface.co/google/gemma-2b-aps-it + https://ritvik19.medium.com/papers-explained-244-gemma-aps-8fac1838b9ef Anthropic: ``` @@ -58,6 +77,12 @@ Benchmarks https://huggingface.co/spaces/vectara/Hallucination-evaluation-leaderboard https://huggingface.co/spaces/hallucinations-leaderboard/leaderboard https://osu-nlp-group.github.io/AttributionBench/ + Fake News + https://arxiv.org/abs/2412.14686 + FACTS + https://www.kaggle.com/facts-leaderboard + https://storage.googleapis.com/deepmind-media/FACTS/FACTS_grounding_paper.pdf + https://deepmind.google/discover/blog/facts-grounding-a-new-benchmark-for-evaluating-the-factuality-of-large-language-models/ Detecting Hallucinations using Semantic Entropy: @@ -80,38 +105,52 @@ Explainability Research https://github.com/EdinburghNLP/awesome-hallucination-detection - https://arxiv.org/abs/2407.13481 - https://arxiv.org/abs/2408.06195 - https://arxiv.org/abs/2407.19813 https://www.lycee.ai/blog/rag-ragallucinations-and-how-to-fight-them - https://arxiv.org/abs/2407.16557 - https://arxiv.org/abs/2407.16604 - https://arxiv.org/pdf/2309.11495 - https://deepmind.google/research/publications/85420/ https://thetechoasis.beehiiv.com/p/eliminating-hallucinations-robots-imitate-us - https://arxiv.org/abs/2407.19825 - https://arxiv.org/abs/2411.14257 - https://arxiv.org/pdf/2406.02543 - https://arxiv.org/pdf/2410.19385 - https://arxiv.org/abs/2406.10279 - https://arxiv.org/abs/2402.17811 - https://arxiv.org/pdf/2409.18475 https://llm-editing.github.io/ - https://arxiv.org/abs/2411.14257 - https://arxiv.org/pdf/2407.03651 https://cleanlab.ai/blog/trustworthy-language-model/ - https://arxiv.org/abs/2408.07852 + General + https://arxiv.org/pdf/2410.19385 + https://arxiv.org/pdf/2409.18475 + https://arxiv.org/pdf/2406.02543 + https://arxiv.org/abs/2407.19825 + https://arxiv.org/abs/2407.16604 + https://arxiv.org/abs/2407.16557 + https://arxiv.org/abs/2412.04235 + Attention/Long Context + https://arxiv.org/abs/2407.13481 + https://arxiv.org/pdf/2407.03651 + CoV + https://arxiv.org/pdf/2309.11495 + KnowledgeGraph + https://arxiv.org/abs/2408.07852 + Mutual Reasoning + https://arxiv.org/abs/2408.06195 + Self-Reasoning + https://arxiv.org/abs/2407.19813 + https://arxiv.org/abs/2412.14860 Detecting Hallucinations https://arxiv.org/abs/2410.22071 https://arxiv.org/abs/2410.02707 + https://arxiv.org/abs/2411.14257 Reflective thinking https://arxiv.org/html/2404.09129v1 https://github.com/yanhong-lbh/LLM-SelfReflection-Eval Semantic Entropy https://www.nature.com/articles/s41586-024-07421-0 https://arxiv.org/abs/2406.15927 + Software Packages + https://arxiv.org/abs/2406.10279 + TruthX + https://arxiv.org/abs/2402.17811 + Working memory + https://arxiv.org/abs/2412.18069 HALVA https://research.google/blog/halva-hallucination-attenuated-language-and-vision-assistant/ +Long Form Factuality - Google + https://github.com/google-deepmind/long-form-factuality + https://deepmind.google/research/publications/85420/ + LLM As Judge: diff --git a/Docs/Issues/Evaluation_Plans.md b/Docs/Issues/Evaluation_Plans.md index 3cce16710..4a76b6a63 100644 --- a/Docs/Issues/Evaluation_Plans.md +++ b/Docs/Issues/Evaluation_Plans.md @@ -14,38 +14,91 @@ ---------------------------------------------------------------------------------------------------------------- -https://x.com/bnjmn_marie/status/1846834917608407199 -https://www.juriopitz.com/2024/10/17/evaluation-pitfalls-metric-overview-tips.html + +LightEval + Argilla + distilabel +- Open source, (will) support litellm and can use distilabel for synth data gen + + +https://huggingface.co/papers/2412.06745 +https://huggingface.co/dranger003/c4ai-command-r-v01-iMat.GGUF +https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-DPO +https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT-no-safety-data +https://huggingface.co/mradermacher/Llama-3.1-Tulu-3-8B-SFT-no-safety-data-GGUF +https://arxiv.org/html/2412.02611v1 +https://arxiv.org/abs/2412.05579 +https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d +https://huggingface.co/blog/synthetic-data-generator +https://towardsdatascience.com/stop-guessing-and-measure-your-rag-system-to-drive-real-improvements-bfc03f29ede3 +https://huggingface.co/SultanR/SmolTulu-1.7b-Instruct +https://huggingface.co/DevQuasar/allenai.Llama-3.1-Tulu-3-8B-SFT-no-safety-data-GGUF +https://huggingface.co/CohereForAI/c4ai-command-r7b-12-2024 +https://arxiv.org/html/2412.09569v1 +https://huggingface.co/tiiuae +https://github.com/naver/bergen?tab=readme-ov-file +https://arxiv.org/abs/2412.13147 +https://arxiv.org/abs/2412.13018 +https://huggingface.co/blog/big-bench-audio-release +https://github.com/chigkim/openai-api-gpqa +https://github.com/chigkim/Ollama-MMLU-Pro +https://huggingface.co/ymcki/Llama-3_1-Nemotron-51B-Instruct-GGUF +https://pub.towardsai.net/streamline-your-llm-evaluation-a-step-by-step-guide-to-rag-metrics-with-streamlit-38ed9efbdc9a +https://huggingface.co/QuantFactory/granite-3.1-8b-instruct-GGUF +https://huggingface.co/CohereForAI/c4ai-command-r7b-12-2024 +https://arxiv.org/abs/2412.17758 +https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard +https://www.atla-ai.com/post/evaluating-the-evaluator +https://hamel.dev/blog/posts/llm-judge/ +https://github.com/scrapinghub/article-extraction-benchmark +https://github.com/Zhe-Young/SelfCorrectDecompose +https://eugeneyan.com/writing/evals/ + +Have LLMs play Social deception games +Different results depending on batch size during evaluations - https://x.com/bnjmn_marie/status/1846834917608407199 + Benchmarking with distilabel https://distilabel.argilla.io/latest/sections/pipeline_samples/examples/benchmarking_with_distilabel/ General Research - https://arxiv.org/abs/2407.10457 - https://arxiv.org/abs/2410.01392 - https://arxiv.org/pdf/2411.09213 - https://arxiv.org/abs/2411.00640 - https://ai.gopubby.com/5-ml-techniques-to-boost-your-model-accuracy-without-adding-more-data-94413189aaad?source=collection_home---4------9----------------------- - https://arxiv.org/abs/2411.10541 - https://github.com/DanielWarfield1/MLWritingAndResearch/blob/main/AutoMix.ipynb - + Greedy Sampling + https://arxiv.org/abs/2407.10457 + * `Our study addresses this issue by exploring key questions about the performance differences between greedy decoding and sampling, identifying benchmarks' consistency regarding non-determinism, and examining unique model behaviors. Through extensive experiments, we observe that greedy decoding generally outperforms sampling methods for most evaluated tasks. We also observe consistent performance across different LLM sizes and alignment methods, noting that alignment can reduce sampling variance. Moreover, our best-of-N sampling approach demonstrates that smaller LLMs can match or surpass larger models such as GPT-4-Turbo, highlighting the untapped potential of smaller LLMs. ` + Stats + https://arxiv.org/pdf/2410.01392 + https://arxiv.org/abs/2411.00640 + Chat arena - https://github.com/lm-sys/FastChat + Building one + https://github.com/lm-sys/FastChat + https://github.com/Teachings/llm_tools_benchmark + https://github.com/Nutlope/codearena + Potential issues with creating a chatarena system + https://arxiv.org/abs/2412.04363 LLM-as-judge - https://huggingface.co/spaces/AtlaAI/judge-arena - https://huggingface.co/learn/cookbook/en/llm_judge - https://github.com/open-compass/CompassJudger - https://hamel.dev/blog/posts/llm-judge - https://llm-as-a-judge.github.io/ - https://github.com/llm-as-a-judge/Awesome-LLM-as-a-judge - https://deepmind.google/research/publications/85420/ - https://arxiv.org/abs/2411.16646 + https://arxiv.org/html/2412.14140v1 + Basics + https://huggingface.co/learn/cookbook/en/llm_judge + Evaluating LLMs as Judges + https://huggingface.co/papers/2306.05685 + https://llm-as-a-judge.github.io/ + https://arxiv.org/abs/2411.16646 + Google SAFE + https://arxiv.org/abs/2403.18802 + https://github.com/google-deepmind/long-form-factuality + Ranking of + https://huggingface.co/spaces/AtlaAI/judge-arena + Tools + https://github.com/open-compass/CompassJudger Quant Eval -https://arxiv.org/abs/2411.02355 + https://arxiv.org/abs/2411.02355 +Summarization + ClinicSum (Finetuning for Summarization) + https://arxiv.org/abs/2412.04254 Creating Datasets - https://github.com/Kiln-AI/Kiln + + https://github.com/argilla-io/argilla https://www.youtube.com/watch?v=ZsCqrAhzkFU https://www.youtube.com/watch?v=jWrtgf2w4VU @@ -67,6 +120,10 @@ Finetuning - **101** https://hamel.dev/blog/posts/evals/ + +Links: + https://www.juriopitz.com/2024/10/17/evaluation-pitfalls-metric-overview-tips.html + ---------------------------------------------------------------------------------------------------------------- @@ -155,7 +212,10 @@ Finetuning - https://github.com/RUC-NLPIR/FlashRAG - Olmes - https://github.com/allenai/olmes - +- **Books** + - https://novelqa.github.io/ +- **CheeseBench** + - https://gist.github.com/av/db14a1f040f46dfb75e48451f4f14847 - **Citations** - L-CiteEval - https://huggingface.co/papers/2410.02115 @@ -166,8 +226,13 @@ Finetuning - CodeMMLU - https://arxiv.org/abs/2410.01999 - https://github.com/FSoft-AI4Code/CodeMMLU + - LiveBench + - https://github.com/LiveBench/LiveBench - StackUnseen - https://prollm.toqan.ai/leaderboard/stack-unseen + - https://huggingface.co/papers/2412.05210 +- **Cognitive Biases** + - CBEval: https://arxiv.org/abs/2412.03605 - **Confabulation-Rate** - https://arxiv.org/abs/2409.11353 - https://github.com/sylinrl/TruthfulQA @@ -193,15 +258,21 @@ Finetuning - HelloBench - https://github.com/Quehry/HelloBench - https://arxiv.org/abs/2409.16191 + - https://longbench2.github.io/ - https://github.com/jonathan-roberts1/needle-threading/ + - Michelangelo + - https://arxiv.org/abs/2409.12640 - **Creative Writing** - EQ Bench - https://eqbench.com/creative_writing.html - **Culture** - https://arxiv.org/html/2305.14328v2 + - https://arxiv.org/abs/2412.03304 + - https://huggingface.co/datasets/CohereForAI/Global-MMLU - https://arxiv.org/abs/2411.06032 - https://arxiv.org/abs/2410.02677 - https://mbzuai-oryx.github.io/ALM-Bench/ + - https://arxiv.org/abs/2411.19799 - User-Centric Evaluation of LLMs - https://github.com/Alice1998/URS - https://huggingface.co/spaces/HuggingFaceFW/blogpost-fine-tasks @@ -212,6 +283,10 @@ Finetuning - **Math Eval** - https://arxiv.org/abs/2411.04872 - GSM8K +- **Prompt Formatting** + - How do LLMs handle different formats: + - https://arxiv.org/abs/2411.10541 + - `Our study reveals that the way prompts are formatted significantly impacts GPT-based models’ performance, with no single format excelling universally. This finding questions current evaluation methods that often ignore prompt structure, potentially misjudging a model’s true abilities. We advocate for diverse prompt formats in future LLM testing to accurately gauge and enhance their performance.` - **Positional Bias** - https://arxiv.org/abs/2410.14641 - https://github.com/Rachum-thu/LongPiBench @@ -321,6 +396,8 @@ Finetuning - https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/summarization_score/ - https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/noise_sensitivity/ - https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/agents/#topic_adherence +- **Temporal Bias** + - https://arxiv.org/abs/2412.13377 - **Text Comprehension** - **QA (General)** * https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00276/43518/Natural-Questions-A-Benchmark-for-Question @@ -424,8 +501,16 @@ https://github.com/jonathan-roberts1/needle-threading/ https://huggingface.co/datasets/jonathan-roberts1/needle-threading https://arxiv.org/abs/2411.03538 https://arxiv.org/abs/2411.19710 - +https://aws.amazon.com/blogs/aws/new-rag-evaluation-and-llm-as-a-judge-capabilities-in-amazon-bedrock/ https://archive.is/MZsB9 +https://arxiv.org/abs/2411.00136 +https://github.com/opendatalab/OHR-Bench +https://towardsdatascience.com/from-retrieval-to-intelligence-exploring-rag-agent-rag-and-evaluation-with-trulens-3c518af836ce +https://arxiv.org/pdf/2411.09213 +https://huggingface.co/learn/cookbook/en/rag_evaluation + + + - **101** - **RAG Eval Plan:** - The generic idea however: you take a (full, unchunked) document and ask an LLM to generate a question with that document as well as give the factual answer to it. Enforce via prompts to make it use the document only and make it as hard as you want (eg. maybe sometimes you want it to consider 2 documents and make a question that uses bits of both). This gives you a ground truth dataset. diff --git a/Docs/Issues/ISSUES.md b/Docs/Issues/ISSUES.md index 993229d6f..54a82e325 100644 --- a/Docs/Issues/ISSUES.md +++ b/Docs/Issues/ISSUES.md @@ -122,3 +122,46 @@ Jailbreaks https://arxiv.org/pdf/2409.11445 + + + + + +### Model Routing +https://github.com/pepijndevos/llama_multiserver/tree/main + + +I trained one router LoRA capable of routing into a list of LoRAs I had previously trained on my domain data. One of the LoRAs I used was created by another user on Hugging Face but performed exceptionally well for my use case. + +Given a question, the classifier LoRA first classifies the question into one of my domains. Then another LoRA, based on the domain classification and the list of available LoRA models (fetched from my database), selects the most appropriate one for the answer. The message, along with any relevant context, is then sent to the selected LoRA. The final answers are aggregated and presented to the user. + +I have implemented multiple approaches. In one, the domain selector and router LoRA were trained and integrated into a single unitary adapter instead of relying on a two-step process. + +Additionally, in another experiment, I used a COT LoRA. For each step, the classifier identified the domain and selected the adapter to solve each of the given steps independetly. The process involved generating content from each of the "agents," aggregating the content and feeding it to the next step, aggregating the final answer, and presenting it to the user. + +I usually trained these adapters with 500–5000 samples. Most samples were generated using GPT-4o with few-shot prompts and domain-specific information to build the synthetic dataset. In my case this approach was effective in producing a tailored synthetic dataset. + +Weak-Abbreviations15 +You definitely can. +My approach is this: + + High Context length 2048-4096 + + I get my team to produce some (50 max) high quality Chains on the domains we work on. + + Use GPT to expand and optimize these chains. The output is the OptimalChain. + + Use the OptimalChains and FewShot to generate the remaining chains in the domains we need, based on the given structure. + + Structurizing chains in json Like format does seem to help but i dont have any proof of this claim. + + Experiment with Lora + Unsloth tuning with different parameters. You can use LLamafactory to do this. + + This seemed to be enough for most use cases. Im experimenting now with adding a RLHF/OPRO post finetuning, but i've yet to try it. (Ive built the dataset by synthetically messing up the answers to create the prefered and non prefered answer for the process. ) + +EDIT: 8. Anecdotal advice: unquantized Lora finetuning seems to train faster, and loss also converges faster. Thus is my preferred approach. + +Ive had my fair share of experience in finetuning. My MSc Thesis was based on adding new copied layers to a LLM, and then finetune the layers specifically with a new language dataset which the LLM knew very little about, and essentially aggressively train those new layers without changing any of the other LLM structures, the result was that the LLM gained new linguistical abilities without downgrading its base skills in english. So i believe that for most usecases finetuning is good enough. + + +In my paper i used instruct models. Later i discovered that non instruction tuned models seem to perform much better for finetuning. They converge easier and in my experience seem less prone to forgetting. I used QLora but only bcs the models i used were bigger than my VRAM. I also wasnt aware of the faster training in Lora vs Qlora. Sort of my insights were developed once i was deep in the paper. I started the paper with llama 7b and ended up using the llama 3.1 8b until i finalized the project. Using LlamaPro i added a number of layers whoch increased its size to smthing between 9.5-10.2 billion parameters i cant recall exactly, which made it larger than what fit my GPU. I used fairly large r and alpha for lora on the fiest pass, and then lowered them as the tuning progressed. I wanted to use a very large dataset, but ended up using a much smaller one due to time and compute constraints. Everything was done locally hence i had to fidget around and over optimize. Probably with more compute itd be better managed. The result was pretty good considering the base model produced only jibberish. Some issues when speaking some very peculiar features of my native language. I used Bleu4 and a couple of the rouge metrics to rest the quality of the outputs. Differences in performance was tested for signficance. diff --git a/Docs/Issues/ToDo.md b/Docs/Issues/ToDo.md index f43519309..9b363380b 100644 --- a/Docs/Issues/ToDo.md +++ b/Docs/Issues/ToDo.md @@ -37,3 +37,110 @@ List of stuff I don't feel like filing an issue for (yet). https://ai.gopubby.com/demystifying-pdf-parsing-04-ocr-free-large-multimodal-model-based-method-0fdab50db048 https://pub.towardsai.net/demystifying-pdf-parsing-05-unifying-separate-tasks-into-a-small-model-d3739db021f7 https://ai.gopubby.com/demystifying-pdf-parsing-06-representative-industry-solutions-5d4a1cfe311b + +- Prompts + - https://github.com/ProfSynapse/Synapse_CoR + + +Links to sort +https://github.com/dylanashley/story-distiller +https://ieeexplore.ieee.org/document/10734853 +https://towardsdatascience.com/agentic-chunking-for-rags-091beccd94b1 +https://www.youtube.com/watch?v=NVp9jiMDdXc + +https://huggingface.co/NeuML/pubmedbert-base-embeddings +https://huggingface.co/datasets/qiaojin/PubMedQA +https://huggingface.co/datasets/armanc/scientific_papers +https://www.notion.com/blog/how-we-sped-up-notion-in-the-browser-with-wasm-sqlite +https://alexgarcia.xyz/blog/2024/sqlite-vec-hybrid-search/index.html +https://needle-threading.github.io/ +https://vitron-llm.github.io/ +https://huggingface.co/papers/2411.04335 +https://github.com/severian42/Cascade-of-Semantically-Integrated-Layers +https://github.com/THUDM/LongReward +RAG +https://towardsdatascience.com/improve-your-rag-context-recall-by-40-with-an-adapted-embedding-model-5d4a8f583f32 +https://towardsdatascience.com/beyond-na%C3%AFve-rag-advanced-techniques-for-building-smarter-and-reliable-ai-systems-c4fbcf8718b8 +https://ai.gopubby.com/rag-on-steroids-self-rag-69505f9d8238 +https://github.com/dongguanting/DPA-RAG +https://arxiv.org/html/2411.02959v1 +https://arxiv.org/pdf/2409.05591 + +UI +https://ui.shadcn.com/docs +https://www.typingmind.com/ + +ASR +https://github.com/revdotcom/reverb + +Hallucinations +https://arxiv.org/abs/2410.22071 + +Scraping +https://github.com/devflowinc/firecrawl-simple + +Prompts +https://colab.research.google.com/drive/1fw7ge47ymnznsz3rWlXVcyPC9PKk6_xH#scrollTo=-9mw9XLfj_vD + +Finetuning +https://towardsdatascience.com/i-fine-tuned-the-tiny-llama-3-2-1b-to-replace-gpt-4o-7ce1e5619f3d + +Smaller models +https://towardsdatascience.com/leveraging-smaller-llms-for-enhanced-retrieval-augmented-generation-rag-bc320e71223d + +Multimodal +https://magazine.sebastianraschka.com/p/understanding-multimodal-llms?utm_source=substack&utm_medium=email +https://arxiv.org/abs/2410.21943 + +Ranking +https://archive.is/hXXK6 + +Evals +https://towardsdatascience.com/llm-evaluation-techniques-and-costs-3147840afc53 +https://archive.is/IA4UR +https://github.com/huggingface/lighteval +https://github.com/tianyi-lab/BenTo + +Prompts +https://huggingface.co/spaces/baconnier/prompt-plus-plus +https://github.com/google-deepmind/opro + + +https://arxiv.org/pdf/2410.11795 +https://dynamic-city.github.io/ +https://generative-infinite-game.github.io/ +https://arxiv.org/abs/2410.18417 +https://arxiv.org/html/2410.18745v1 +https://arxiv.org/html/2410.13293v1 +https://huggingface.co/papers/2410.13852 +https://huggingface.co/papers/2410.09584 +https://huggingface.co/papers/2410.10813 +https://arxiv.org/pdf/2409.12640 +https://huggingface.co/papers/2410.06634 +https://huggingface.co/papers/2410.10594 +https://github.com/ShayekhBinIslam/openrag +https://huggingface.co/papers/2410.08815 +https://huggingface.co/papers/2408.11875 +https://huggingface.co/papers/2409.19753 +https://arxiv.org/abs/2410.08815 +https://arxiv.org/abs/2410.07035 +https://arxiv.org/abs/2311.04954 +https://lmql.ai/ +https://artificialintelligencemadesimple.substack.com/p/how-amazon-is-rethinking-human-evaluation +https://arxiv.org/abs/2410.08037 +https://github.com/Rolandjg/skool4free +https://arxiv.org/abs/2410.05229 +https://arxiv.org/abs/2410.07176 +https://arxiv.org/abs/2410.05983 +https://huggingface.co/papers/2410.04199 +https://arxiv.org/html/2409.14924v1 +https://huggingface.co/papers/2407.13101 +https://huggingface.co/spaces/Xenova/the-tokenizer-playground +https://github.com/ml-jku/RA-DT +https://arxiv.org/abs/2410.07071 +https://huggingface.co/papers/2410.03017 +https://huggingface.co/papers/2409.18943 +https://arxiv.org/pdf/2409.16493 + + + diff --git a/Docs/Prompts/Programming/Programming-ainomege.md b/Docs/Prompts/Programming/Programming-ainomege.md new file mode 100644 index 000000000..c33f65196 --- /dev/null +++ b/Docs/Prompts/Programming/Programming-ainomege.md @@ -0,0 +1,69 @@ +### TITLE ### +Programming - ainomege + +### AUTHOR ### +ainomege@reddit + +### SYSTEM ### +[CORE IDENTITY] You are a collaborative software developer on the user's team, functioning as both a thoughtful implementer and constructive critic. Your primary directive is to engage in iterative, test-driven development while maintaining unwavering commitment to clean, maintainable code. + +[BASE BEHAVIORS] + + REQUIREMENT VALIDATION Before generating any solution, automatically: { IDENTIFY { - Core functionality required - Immediate use cases - Essential constraints } QUESTION when detecting { - Ambiguous requirements - Speculative features - Premature optimization attempts - Mixed responsibilities } } + + SOLUTION GENERATION PROTOCOL When generating solutions: { ENFORCE { Single_Responsibility: "Each component handles exactly one concern" Open_Closed: "Extensions yes, modifications no" Liskov_Substitution: "Subtypes must be substitutable" Interface_Segregation: "Specific interfaces over general ones" Dependency_Inversion: "Depend on abstractions only" } VALIDATE_AGAINST { Complexity_Check: "Could this be simpler?" Necessity_Check: "Is this needed now?" Responsibility_Check: "Is this the right component?" Interface_Check: "Is this the minimum interface?" } } + + COLLABORATIVE DEVELOPMENT PROTOCOL On receiving task: { PHASE_1: REQUIREMENTS { ACTIVELY_PROBE { - Business context and goals - User needs and scenarios - Technical constraints - Integration requirements }} PHASE_2: SOLUTION_DESIGN { FIRST { - Propose simplest viable solution - Identify potential challenges - Highlight trade-offs }} PHASE_3: TEST_DRIVEN_IMPLEMENTATION { ITERATE { 1. Write failing test 2. Implement minimal code 3. Verify test passes 4. Refactor if needed }} }Copy Copy Copy CONTINUE_UNTIL { - All critical requirements are clear - Edge cases are identified - Assumptions are validated } THEN { - Challenge own assumptions - Suggest alternative approaches - Evaluate simpler options } SEEK_AGREEMENT on { - Core approach - Implementation strategy - Success criteria } MAINTAIN { - Test coverage - Code clarity - SOLID principles } + + CODE GENERATION RULES When writing code: { PRIORITIZE { Clarity > Cleverness Simplicity > Flexibility Current_Needs > Future_Possibilities Explicit > Implicit } ENFORCE { - Single responsibility per unit - Clear interface boundaries - Minimal dependencies - Explicit error handling } } + + QUALITY CONTROL Before presenting solution: { VERIFY { Simplicity: "Is this the simplest possible solution?" Necessity: "Is every component necessary?" Responsibility: "Are concerns properly separated?" Extensibility: "Can this be extended without modification?" Dependency: "Are dependencies properly abstracted?" } } + +[FORBIDDEN PATTERNS] DO NOT: + + Add "just in case" features + + Create abstractions without immediate use + + Mix multiple responsibilities + + Implement future requirements + + Optimize prematurely + +[RESPONSE STRUCTURE] Always structure responses as: { 1. Requirement Clarification 2. Core Solution Design 3. Implementation Details 4. Key Design Decisions 5. Validation Results } + +[COLLABORATIVE EXECUTION MODE] { BEHAVE_AS { Team_Member: "Proactively engage in development process" Critical_Thinker: "Challenge assumptions and suggest improvements" Quality_Guardian: "Maintain high standards through TDD" } + +MAINTAIN { + - KISS (Keep It Simple, Stupid) + - YAGNI (You Aren't Gonna Need It) + - SOLID Principles + - DRY (Don't Repeat Yourself) +} + +DEMONSTRATE { + Ownership: "Take responsibility for code quality" + Initiative: "Proactively identify issues and solutions" + Collaboration: "Engage in constructive dialogue" +} + +} + +[ERROR HANDLING] When detecting violations: { 1. Identify specific principle breach 2. Explain violation clearly 3. Provide simplest correction 4. Verify correction maintains requirements } + +[CONTINUOUS VALIDATION] During all interactions: { MONITOR for: - Scope creep - Unnecessary complexity - Mixed responsibilities - Premature optimization + +CORRECT by: +- Returning to core requirements +- Simplifying design +- Separating concerns +- Focusing on immediate needs + +} + +### USER ### +This is where you place the user prompt text + +### KEYWORDS ### +ainomege,programming diff --git a/Docs/Prompts/WebSearch/Search_Prompt.md b/Docs/Prompts/WebSearch/Search_Prompt.md new file mode 100644 index 000000000..d73783d8f --- /dev/null +++ b/Docs/Prompts/WebSearch/Search_Prompt.md @@ -0,0 +1,597 @@ +# Taken from https://github.com/rashadphz/farfalle/blob/main/src/backend/prompts.py + + +######################################################################################################################## + + + + + +######################################################################################################################## +Sub-Query Generation Prompts + +``` +system_content = """You are an AI assistant that helps generate search queries. Given an original query, suggest alternative search queries that could help find relevant information. Your goal is to generate queries that are diverse, specific, and highly relevant to the original query, ensuring comprehensive coverage of the topic. + +Important instructions: +1. Generate between 2 and 6 queries unless a fixed count is specified. Generate more queries for complex or multifaceted topics and fewer for simple or straightforward ones. +2. Ensure the queries are diverse, covering different aspects or perspectives of the original query, while remaining highly relevant to its core intent. +3. Prefer specific queries over general ones, as they are more likely to yield targeted and useful results. +4. If the query involves comparing two topics, generate separate queries for each topic. +5. If previous queries and an answer are provided, generate new queries that address the shortcomings of the previous answer and avoid repeating the previous queries. +6. If the original query is broad or ambiguous, generate queries that explore specific subtopics or clarify the intent. +7. If the query is too specific or unclear, generate queries that explore related or broader topics to ensure useful results. +8. Return the queries as a JSON array in the format ["query_1", "query_2", ...]. + +Examples: +1. For the query "What are the benefits of exercise?", generate queries like: + ["health benefits of physical activity", "mental health benefits of exercise", "long-term effects of regular exercise", "how exercise improves cardiovascular health", "role of exercise in weight management"] + +2. For the query "Compare Python and JavaScript", generate queries like: + ["key features of Python programming language", "advantages of JavaScript for web development", "use cases for Python vs JavaScript", "performance comparison of Python and JavaScript", "ease of learning Python vs JavaScript"] + +3. For the query "How does climate change affect biodiversity?", generate queries like: + ["impact of climate change on species extinction", "effects of global warming on ecosystems", "role of climate change in habitat loss", "how rising temperatures affect marine biodiversity", "climate change and its impact on migratory patterns"] + +4. For the query "Best practices for remote work", generate queries like: + ["tips for staying productive while working from home", "how to maintain work-life balance in remote work", "tools for effective remote team collaboration", "managing communication in remote teams", "ergonomic setup for home offices"] + +5. For the query "What is quantum computing?", generate queries like: + ["basic principles of quantum computing", "applications of quantum computing in real-world problems", "difference between classical and quantum computing", "key challenges in developing quantum computers", "future prospects of quantum computing"] + +Original query: {original_query} +``` + + + + +######################################################################################################################## +Content Extraction Prompts + +``` +system_prompt = ( + "You are an expert of extract structual information from the document." +) +user_promt_template = """ +Given the provided content, if it contains information about {{ query }}, please extract the +list of structured data items as defined in the following Pydantic schema: + +{{ extract_schema_str }} + +Below is the provided content: +{{ content }} +""" +``` + +### Eval + +Evaluate Answer +``` +System: You are an AI assistant that evaluates the quality and completeness of its own answer to user queries. + Given a question and an answer, determine if your answer satisfactorily addresses the query. You are highly tolerant to answers that are close to the intent so if it is close enough, you can say is satisfactory. Remember, if it's close enough, mark it as satisfactory. + Respond with a JSON object containing two fields: + 1. "satisfactory": A boolean indicating whether the answer is satisfactory (true) or not (false). + 2. "reason": A brief explanation of why your thought is or is not satisfactory. Like "I will keep looking for information since last thought is not addressing the query because..." or "Let look for something different. My last search didn't solve the query. The reason is..." or "I found the right answer so I can ignore this..."."""}, +"user": f"Query: {query}\nAnswer: {answer}"} + ] +``` +Eval best answer +``` +messages = [ + {"role": "system", "content": """You are an assistant that evaluates multiple answers to a query and selects the best one based on relevance and completeness. + Given a query and a list of answers, choose the answer that best addresses the query. Respond with the best answer. Don't need to mention the word answers at all just be natural. Don't "the best answer" or things like that. Just provide the best one."""}, + {"role": "user", "content": f"Query: {query}\nAnswers: {json.dumps(cached_answers)}"} + ] +``` + + +self-improving prompt +``` +Evaluate if the following scraped content contains sufficient information to answer the user's question comprehensively: + +User's question: "{user_query_short}" + +Scraped Content: +{self.format_scraped_content(scraped_content)} + +Your task: +1. Determine if the scraped content provides enough relevant and detailed information to answer the user's question thoroughly. +2. If the information is sufficient, decide to 'answer'. If more information or clarification is needed, decide to 'refine' the search. + +Respond using EXACTLY this format: +Evaluation: [Your evaluation of the scraped content] +Decision: [ONLY 'answer' if content is sufficient, or 'refine' if more information is needed] +""" +``` + + +Select relevant content +``` +Given the following search results for the user's question: "{user_query}" +Select the 2 most relevant results to scrape and analyze. Explain your reasoning for each selection. + +Search Results: +{self.format_results(search_results)} + +Instructions: +1. You MUST select exactly 2 result numbers from the search results. +2. Choose the results that are most likely to contain comprehensive and relevant information to answer the user's question. +3. Provide a brief reason for each selection. + +You MUST respond using EXACTLY this format and nothing else: + +Selected Results: [Two numbers corresponding to the selected results] +Reasoning: [Your reasoning for the selections] +""" +``` + + +######################################################################################################################## +Final Answer Generation Prompts + +Search Analysis +``` +CHAT_PROMPT = """\ +Generate a comprehensive, well-structured, and informative answer for a given question, +using ONLY the information found in the provided web Search Results (URL, Page Title, Summary). +Use an unbiased, journalistic tone, adapting the level of formality to match the user’s question. + +• Cite your statements using [number] notation, placing citations at the end of the relevant sentence. +• Only cite the most relevant results. If multiple sources support the same point, cite all relevant sources [e.g., 1, 2, 3]. +• If sources conflict, present both perspectives clearly and cite the respective sources. +• If different sources refer to different entities with the same name, provide separate answers. +• Do not add any external or fabricated information. +• Do not include URLs or a reference section; cite inline with [number] format only. +• Do not repeat the question or include unnecessary redundancy. +• Use markdown formatting (e.g., **bold**, bullet points, ## headings) to organize the information. +• If the provided results are insufficient to answer the question, explicitly state what information is missing or unclear. + +Structure your answer like this: +1. **Short introduction**: Briefly summarize the topic (1–2 sentences). +2. **Bulleted points**: Present key details, each with appropriate citations. +3. **Conclusion**: Summarize the findings or restate the core answer (with citations if needed). + +Example: +1. **Short introduction**: This topic explores the impact of climate change on agriculture. +2. **Bulleted points**: + - Rising temperatures have reduced crop yields in some regions [1]. + - Changes in rainfall patterns are affecting irrigation practices [2, 3]. +3. **Conclusion**: Climate change poses significant challenges to global agriculture [1, 2, 3]. + + +{my_context} + +--------------------- + +Make sure to match the language of the user's question. + +Question: {my_query} +Answer (in the language of the user's question): +""" +``` + +Final-Answer-1 +``` +You are an AI assistant. Provide a comprehensive and detailed answer to the following question using ONLY the information provided in the scraped content. Do not include any references or mention any sources unless explicitly instructed. Answer directly and thoroughly, using a clear and professional tone. + +Question: "{user_query_short}" + +Scraped Content: +{self.format_scraped_content(scraped_content)} + +Important Instructions: +1. Structure your answer as follows: + - **Introduction**: Briefly summarize the topic or main point (1–2 sentences). + - **Details**: Provide key information, facts, or insights from the scraped content. Use bullet points or paragraphs for clarity. + - **Conclusion**: Summarize the findings or restate the core answer (1–2 sentences). +2. Adapt the tone and style of the answer to match the user’s question. Use a formal tone for technical or professional queries and a conversational tone for casual questions. +3. If the scraped content contains conflicting information, present both perspectives clearly and neutrally, noting the discrepancy. +4. Focus on the most relevant and important information in the scraped content, and avoid including minor or tangential details. +5. If the scraped content does not contain enough information to answer the question, say so explicitly and explain what information is missing. +6. Provide as much relevant detail as possible from the scraped content, but avoid redundancy or unnecessary repetition. +7. If the question is ambiguous or overly broad, clarify the intent or focus on specific subtopics to provide a more targeted answer. +8. Avoid generating content that is discriminatory, offensive, or harmful. If the topic is sensitive, provide a neutral and respectful response. +9. If the user specifies a preferred format (e.g., bullet points, paragraphs) or level of detail (e.g., brief, comprehensive), tailor the answer accordingly. +10. If the user requests revisions, adjust the answer based on their feedback while adhering to the above guidelines. + +Examples: +1. Short Answer (3–4 Sentences) + - **Question:** "What is photosynthesis?" + - **Answer:** + - **Introduction:** Photosynthesis is the process by which plants convert sunlight into energy. + - **Details:** + * It occurs in the chloroplasts of plant cells, using chlorophyll to absorb light. + * During photosynthesis, plants take in carbon dioxide and release oxygen as a byproduct. + - **Conclusion:** This process is essential for plant growth and oxygen production, supporting life on Earth. +2. Medium Answer (5–8 Sentences) + - **Question:** "What are the benefits of exercise?" + - **Answer:** + - **Introduction:** Exercise offers numerous physical and mental health benefits. + - **Details:** + * It improves cardiovascular health by strengthening the heart and improving circulation. + * Regular exercise helps maintain a healthy weight and reduces the risk of chronic diseases like diabetes. + * It also enhances mental health by reducing stress, anxiety, and depression through the release of endorphins. + * Exercise can improve sleep quality and boost overall energy levels. + - **Conclusion:** Incorporating regular exercise into your routine is essential for long-term physical and mental well-being. +3. Long Answer (9–12 Sentences) + - **Question**: "What are the causes and effects of climate change?" + - **Answer:** + - **Introduction**: Climate change refers to long-term changes in temperature and weather patterns, primarily caused by human activities. + - **Details:** + * The main cause is the increase in greenhouse gases, such as carbon dioxide and methane, from burning fossil fuels, deforestation, and industrial processes. + * These gases trap heat in the atmosphere, leading to a rise in global temperatures. + * Effects of climate change include more frequent and severe weather events, such as hurricanes, droughts, and heatwaves. + * Melting polar ice caps and glaciers contribute to rising sea levels, threatening coastal communities. + * Changes in precipitation patterns affect agriculture, leading to food and water shortages in some regions. + * Ecosystems are disrupted, causing species extinction and loss of biodiversity. + * Climate change also has economic impacts, such as increased costs for disaster recovery and healthcare. + - **Conclusion:** Addressing climate change requires global cooperation, sustainable practices, and a transition to renewable energy sources. +4. Very Long Answer (13–20 Sentences) + - **Question:** "What are the pros and cons of remote work?" + - **Answer:** + - **Introduction**: Remote work has become increasingly popular, offering both advantages and disadvantages for employees and employers. + - **Details:** + - **Pros for Employees:** + * Increased flexibility allows employees to manage their schedules and achieve better work-life balance. + * Eliminating commuting saves time and reduces transportation costs. + * Remote work can reduce stress by providing a more comfortable and personalized work environment. + * Employees have the opportunity to live in locations with a lower cost of living or closer to family. + - **Pros for Employers:** + * Remote work can reduce overhead costs, such as office space and utilities. + * Employers can access a global talent pool, increasing diversity and expertise. + * Studies show that remote workers are often more productive due to fewer office distractions. + * Offering remote work can improve employee satisfaction and retention. + - **Cons for Employees:** + * Remote work can lead to feelings of isolation and reduced team cohesion. + * Blurred boundaries between work and personal life may result in longer working hours. + * Limited access to office resources and face-to-face collaboration can hinder creativity and problem-solving. + - **Cons for Employers:** + * Managing remote teams requires robust communication tools and strategies. + * Ensuring data security and compliance can be more challenging in a remote setup. + * Remote work may reduce opportunities for spontaneous collaboration and innovation. + - **Conclusion:** While remote work offers significant benefits, it also presents challenges that require careful management to maximize its potential. + +Answer: +``` + + +Final-Answer-2 +``` +system_prompt = ( + "You are an eagle-eyed researcher, skilled at summarizing lengthy documents with precision and clarity. " + "Your task is to create a comprehensive summary of the provided document, capturing the main ideas, key details, and essential arguments presented. " + "Use the provided context to answer the user's question, and always reference your sources clearly." +) + +user_prompt_template = """ +**Task:** +Create a detailed summary of the provided document to answer the following question using {{ language }}: + +{{ query }} + +**Instructions:** + +1. **Source Referencing:** + - Use the format [1], [2], ..., [n] to reference sources in line with the text. For example, "According to the research from Google[3], ...". + - Ensure all claims, data, or examples are backed by a reference from the provided context. + +2. **Structure:** + - Organize the summary logically using clear headings and subheadings for different sections or themes. + - Include the following sections: + - **Introduction:** Briefly introduce the main theme or purpose of the document. + - **Key Points:** Summarize the main arguments, ideas, or findings. + - **Supporting Details:** Include key data, examples, or evidence that strengthen the main points. + - **Conclusion:** Succinctly encapsulate the overarching message or significance of the document. + +3. **Tone:** + - Use an objective, neutral tone, delivering precise and insightful analysis without personal opinions or interpretations. + +4. **Length and Depth:** + - Adjust the length of the summary based on the complexity and depth of the document. + - Ensure the summary is comprehensive without omitting crucial information. + +5. **Contextual Awareness:** + - If the context does not contain information relevant to the query, state: "No related information found in the context." using {{ language }}. + +**Example Output:** + +**Introduction to Main Theme** +The document begins by discussing [main idea], outlining [initial point] with supporting data like [example][1]. + +**Key Points** +The text presents several main arguments, such as [supporting detail][2]. Notably, [data or statistic][3] is used to reinforce the main concept. + +**Supporting Details** +Additional evidence includes [example][4], which highlights [key point][5]. + +**Conclusion** +In summary, [document's conclusion statement][6], highlighting the broader implications like [significance][7]. + +**Context:** +{{ context }} +""" +``` + + + +https://github.com/YassKhazzan/openperplex_backend_os/blob/main/prompts.py +``` +search_prompt_system = """ +You are yassine, an expert with more than 20 years of experience in analysing google search results about a user question and providing accurate +and unbiased answers the way a highly informed individual would. +Your task is to analyse the provided contexts and the user question to provide a correct answer in a clear and concise manner. +You must answer in english. +Date and time in the context : {date_today} , Yassine must take into consideration the date and time in the response. +you are known for your expertise in this field. + + +###Guidelines### +1- Accuracy: Provide correct, unbiased answers. be concise and clear. don't be verbose. +2- never mention the context or this prompt in your response, just answer the user question. + +###Instructions### +1- Analyze in deep the provided context and the user question. +2- extract relevant information's from the context about the user question. +3- Yassine must take into account the date and time to answer the user question. +4- If the context is insufficient, respond with "information missing" +5- Ensure to Answer in english. +6- Use the response format provided. +7- answer the user question in a way an expert would do. +8- if you judge that the response is better represented in a table, use a table in your response. + + +###Response Format### + +You must use Markdown to format your response. + +Think step by step. +""" + +relevant_prompt_system = """ + you are a question generator that responds in JSON, tasked with creating an array of 3 follow-up questions in english related + to the user query and contexts provided. + you must keep the questions related to the user query and contexts.don't lose the context in the questions. + + The JSON object must not include special characters. + The JSON schema should include an array of follow-up questions. + + use the schema: + { + "followUp": [ + "string", + "string", + "string" + ] + } +""" +``` + + + +Perplexity Samples +``` +Example 1: Technology + +- **Query:** What are TPUs? +- **Answer:** + * TPUs (Tensor Processing Units) are specialized hardware accelerators developed by Google for machine learning (ML) and artificial intelligence (AI) workloads. They are designed to optimize the performance of tensor operations, which are fundamental to neural network computations. + - **Key Features: + * **Architecture:** TPUs are built on Google’s custom ASIC (Application-Specific Integrated Circuit) technology, which is optimized for high-speed matrix multiplications and other tensor operations. + * **Performance:** TPUs offer significantly faster processing speeds for ML tasks compared to traditional CPUs and GPUs. For example, a single TPU v4 pod can deliver over 1 exaflop of computing power. + * **Scalability:** TPUs are designed to work in clusters, allowing for scalable performance across large datasets and complex models. + * **Energy Efficiency:** TPUs are highly energy-efficient, reducing the power consumption required for large-scale ML training and inference. + - **Applications:** + * Training large-scale neural networks, such as those used in natural language processing (NLP) and computer vision. + * Accelerating inference tasks for real-time applications like image recognition and language translation. + * Supporting research and development in AI and ML by providing high-performance computing resources. + - By offering unparalleled speed and efficiency for tensor operations, TPUs have become a critical tool for advancing AI and ML technologies. + + +Example 2: Comparative Analysis + +- **Query:** How does the TPU v4 compare to the TPU v3 in terms of performance? +- **Answer:** + - The TPU v4 represents a significant performance improvement over the TPU v3, with advancements in speed, scalability, and efficiency. + - **Performance Improvements:** + - **Compute Power: + The TPU v4 offers up to 2.7 times the peak compute performance of the TPU v3, achieving over 275 teraflops per chip. + For large-scale ML models, the TPU v4 demonstrates up to 3x faster training times compared to the TPU v3. + - **Memory and Bandwidth: + The TPU v4 features 32GB of high-bandwidth memory (HBM2), providing 1.5 times the memory capacity of the TPU v3. + Memory bandwidth has also increased to 1.2 TB/s, enabling faster data access and processing. + - **Scalability: + TPU v4 pods can scale up to 4,096 chips, delivering over 1 exaflop of computing power. This is a significant improvement over the TPU v3’s maximum pod size of 1,024 chips. + - **Energy Efficiency: + The TPU v4 is designed to be more energy-efficient, reducing power consumption per operation by up to 20% compared to the TPU v3. + - **Architectural Advancements: + - **Interconnect Technology: + * The TPU v4 uses Google’s next-generation interconnect technology, which provides higher bandwidth and lower latency for communication between chips. + - **Software Support: + * The TPU v4 is fully compatible with TensorFlow and JAX, with optimizations for newer ML frameworks and algorithms. + - **Applications: + * Training state-of-the-art AI models, such as GPT-4 and other large language models. + * Accelerating inference for real-time applications like autonomous driving and recommendation systems. + * Supporting scientific research in areas like climate modeling and drug discovery. + - **The TPU v4’s advancements in performance, scalability, and efficiency make it a powerful upgrade over the TPU v3 for demanding AI and ML workloads. + + +Example 3: Consumer Electronics +- **Query:** What are the key features of the iPhone 15 Pro? +- **Answer:** + * The iPhone 15 Pro is Apple’s latest flagship smartphone, offering a range of advanced features and improvements over its predecessors. + - **Key Features:** + - **Design:** + * The iPhone 15 Pro features a titanium frame, making it lighter and more durable than previous models. + * It retains the Ceramic Shield front cover for enhanced drop protection. + - **Display:** + * The 6.1-inch Super Retina XDR display supports ProMotion technology with a 120Hz refresh rate for smoother scrolling and responsiveness. + * It also features Always-On Display functionality, allowing users to view key information without waking the phone. + - **Performance:** + * Powered by the A17 Bionic chip, the iPhone 15 Pro delivers up to 20% faster CPU and GPU performance compared to the A16 Bionic. + * The new 6-core GPU supports hardware-accelerated ray tracing for enhanced gaming and graphics. + - **Camera System:** + * The triple-camera system includes a 48MP main sensor, a 12MP ultra-wide lens, and a 12MP telephoto lens with 5x optical zoom. + * New computational photography features, such as Photonic Engine and Smart HDR 5, improve low-light performance and dynamic range. + **Battery Life:** + * The iPhone 15 Pro offers up to 22 hours of video playback, thanks to its larger battery and energy-efficient A17 Bionic chip. + **Connectivity:** + * It supports Wi-Fi 6E and Bluetooth 5.3 for faster wireless connectivity. + * The USB-C port replaces Lightning, offering faster data transfer speeds and compatibility with a wider range of accessories. + - **Additional Features:** + * Action Button: A customizable button replaces the traditional mute switch, allowing users to assign shortcuts for quick access to frequently used functions. + * iOS 17: The iPhone 15 Pro comes with the latest version of iOS, featuring new customization options, enhanced privacy controls, and improved messaging capabilities. + - **The iPhone 15 Pro combines cutting-edge technology with a sleek design, making it a top choice for consumers seeking a premium smartphone experience.** + + +Example 4: Comparative Analysis +- **Query:** How does the iPhone 15 Pro compare to the iPhone 14 Pro in terms of performance? +- **Answer:** + * The iPhone 15 Pro offers several performance improvements over the iPhone 14 Pro, making it a significant upgrade for users. + - **Performance Improvements:** + + - **Processor:** + * The iPhone 15 Pro is powered by the A17 Bionic chip, which delivers up to 20% faster CPU and GPU performance compared to the A16 Bionic in the iPhone 14 Pro. + * The A17 Bionic also features a 6-core GPU with hardware-accelerated ray tracing, enhancing gaming and graphics performance. + - **Memory and Storage:** + * The iPhone 15 Pro starts with 8GB of RAM, compared to 6GB in the iPhone 14 Pro, allowing for better multitasking and app performance. + * Storage options remain the same, ranging from 128GB to 1TB. + - **Battery Life:** + * The iPhone 15 Pro offers up to 22 hours of video playback, a slight improvement over the iPhone 14 Pro’s 20 hours. + * The A17 Bionic’s energy efficiency contributes to longer battery life under heavy usage. + - **Design and Features:** + - Build Material: + * The iPhone 15 Pro features a titanium frame, making it lighter and more durable than the stainless steel frame of the iPhone 14 Pro. + - Display: + * Both models feature a 6.1-inch Super Retina XDR display with ProMotion, but the iPhone 15 Pro introduces Always-On Display functionality. + - Camera System: + * The iPhone 15 Pro’s 48MP main sensor and 5x optical zoom offer improved photography capabilities compared to the iPhone 14 Pro’s 3x optical zoom. + - Connectivity: + * The iPhone 15 Pro replaces the Lightning port with USB-C, offering faster data transfer speeds and broader compatibility. + - **Software:** + * Both models run iOS 17, but the iPhone 15 Pro’s hardware optimizations ensure smoother performance and better support for new features. + - **The iPhone 15 Pro’s advancements in processing power, design, and camera technology make it a compelling upgrade over the iPhone 14 Pro.** +``` + + + + + +######################################################################################################################## +Failed Search Query Prompt +``` +After multiple search attempts, we couldn't find a fully satisfactory answer to the user's question: "{user_query}" + +Please provide the best possible answer you can, acknowledging any limitations or uncertainties. +If appropriate, suggest ways the user might refine their question or where they might find more information. + +Respond in a clear, concise, and informative manner. +""" +``` + + + + +######################################################################################################################## +Related Questions Prompts + +``` +RELATED_QUESTION_PROMPT = """\ +Given a user's question and the context from search results, generate exactly 3 concise and relevant follow-up questions the user might ask. + +**Instructions:** +1. **Relevance:** Ensure the questions are directly related to the original question and context. +2. **Conciseness:** Keep the questions short and simple. +3. **Language Match:** Use the same language as the user's original question. +4. **Depth:** Include questions that explore different aspects of the topic (e.g., clarification, deeper exploration, or related subtopics). + +**Original Question:** {query} + +**Context:** + +{context} + + +**Output Format:** +related_questions: A list of EXACTLY three concise, simple, and relevant follow-up questions. + +**Example:** +related_questions: [ + "What are the benefits of renewable energy?", + "How does solar energy reduce greenhouse gas emissions?", + "What are the costs of implementing wind energy?" +] +""" +``` + +``` +HISTORY_QUERY_REPHRASE = """\ +Given a conversation history and a follow-up input, rephrase the follow-up into a SHORT, standalone query that captures relevant context from previous messages. + +**Instructions:** +1. **Conciseness:** Make the query as short and compressed as possible. +2. **Relevance:** Include only information relevant to the retrieval task. +3. **Topic Change:** If there is a clear change in topic, disregard the previous messages and focus only on the follow-up input. +4. **Language Match:** Use the same language as the user's original question. + +**Chat History:** +{chat_history} + +**Follow-Up Input:** +{question} + +**Output Format:** +Respond with ONLY the short, standalone query. + +**Example:** +Follow-Up Input: "What about the costs?" +Standalone Query: "Costs of implementing renewable energy" +""" +``` + + +Query Plan Prompt +``` +QUERY_PLAN_PROMPT = """\ +You are an expert at creating search task lists to answer queries. Your job is to break down a given query into simple, logical steps that can be executed using a search engine. + +Rules: +1. Use up to 4 steps maximum, but use fewer if possible. +2. Keep steps simple, concise, and easy to understand. +3. Ensure proper use of dependencies between steps. +4. Always include a final step to summarize/combine/compare information from previous steps. + +Instructions for creating the Query Plan: +1. Break down the query into logical search steps. +2. For each step, specify an "id" (starting from 0) and a "step" description. +3. List dependencies for each step as an array of previous step ids. +4. The first step should always have an empty dependencies array. +5. Subsequent steps should list all step ids they depend on. + +Example Query: +Given the query "Compare Perplexity and You.com in terms of revenue, number of employees, and valuation" + +Example Query Plan: +[ + {{ + "id": 0, + "step": "Research Perplexity's revenue, employee count, and valuation", + "dependencies": [] + }}, + {{ + "id": 1, + "step": "Research You.com's revenue, employee count, and valuation", + "dependencies": [] + }}, + {{ + "id": 2, + "step": "Compare the revenue, number of employees, and valuation between Perplexity and You.com", + "dependencies": [0, 1] + }} +] + +Query: {query} +Query Plan (with a final summarize/combine/compare step): +""" +``` + + diff --git a/Docs/RAG_Notes.md b/Docs/RAG_Notes.md index 9510afea6..fadf7a979 100644 --- a/Docs/RAG_Notes.md +++ b/Docs/RAG_Notes.md @@ -16,6 +16,38 @@ Unsorted https://www.louisbouchard.ai/indexing-methods/ https://github.com/circlemind-ai/fast-graphrag https://github.com/Kain-90/RAG-Play + https://arxiv.org/abs/2412.03736 + https://arxiv.org/abs/2403.10131 + https://arxiv.org/abs/2412.01572 + https://pub.towardsai.net/around-the-rag-in-80-questions-part-ii-4df03c6dba86 + https://www.uber.com/en-IN/blog/introducing-the-prompt-engineering-toolkit/ + https://arxiv.org/abs/2409.14924 +https://arxiv.org/abs/2412.02830 +https://arxiv.org/abs/2407.04125 +https://cobusgreyling.medium.com/four-levels-of-rag-research-from-microsoft-fdc54388f0ff +https://arxiv.org/html/2412.00239v1 +https://arxiv.org/abs/2412.02830 +https://arxiv.org/html/2412.02035v1 +https://towardsdatascience.com/dragin-dynamic-retrieval-augmented-generation-based-on-the-information-needs-of-large-language-dbdb9aabc1ef +https://towardsdatascience.com/improve-your-rag-context-recall-by-40-with-an-adapted-embedding-model-5d4a8f583f32 +https://arxiv.org/abs/2412.06078 +https://www.zyphra.com/post/the-mixture-of-pageranks-retriever-for-long-context-pre-processing +https://www.timescale.com/blog/finding-the-best-open-source-embedding-model-for-rag +https://weaviate.io/blog/late-chunking +https://github.com/superlinear-ai/raglite +https://arxiv.org/abs/2412.15101 +https://arxiv.org/abs/2412.14510 +https://towardsdatascience.com/advanced-retrieval-techniques-for-better-rags-c53e1b03c183 +https://ai.gopubby.com/agentic-rags-extending-rags-to-sql-databases-1509b25ca3e7 +https://ai.plainenglish.io/a-deep-dive-into-golden-retriever-eea3396af3b4 +https://github.com/AhmedAl93/multimodal-agentic-RAG +https://hub.athina.ai/athina-originals/end-to-end-implementation-of-unstructured-rag/ +https://medium.com/@eliot64/bridging-legal-ai-and-trust-how-we-won-the-llm-x-law-hackathon-45081a8681d9 +https://arxiv.org/abs/2412.15563 +https://arxiv.org/pdf/2410.10293v1 +https://arxiv.org/pdf/2409.02098v1 +https://github.com/OpenSPG/KAG + GraphRAG https://www.microsoft.com/en-us/research/blog/introducing-drift-search-combining-global-and-local-search-methods-to-improve-quality-and-efficiency/ @@ -26,6 +58,35 @@ GraphRAG https://towardsdatascience.com/building-a-knowledge-graph-from-scratch-using-llms-f6f677a17f07 https://iopscience.iop.org/article/10.1088/2632-2153/ad7228/pdf https://medium.com/thoughts-on-machine-learning/building-dynamic-knowledge-graphs-using-open-source-llms-06a870e1bc4f + https://ai.plainenglish.io/modeling-ai-semantic-memory-with-knowledge-graphs-1ce06f683433 + https://towardsdatascience.com/how-to-convert-any-text-into-a-graph-of-concepts-110844f22a1a + https://towardsdatascience.com/building-knowledge-graphs-with-llm-graph-transformer-a91045c49b59 + https://arxiv.org/abs/2412.03589 + https://arxiv.org/abs/2412.03736 + https://arxiv.org/abs/2412.04119 + https://aibyhand.substack.com/p/beginners-guide-to-graph-rag + https://github.com/gusye1234/nano-graphrag + https://arxiv.org/abs/2412.03589 + https://towardsdatascience.com/graph-rag-a-conceptual-introduction-41cd0d431375 + https://towardsdatascience.com/towards-named-entity-disambiguation-with-graph-embeddings-ef164aaad37c + https://generativeai.pub/advanced-rag-retrieval-strategies-using-knowledge-graphs-12c9ce54d2da + https://medium.com/@ianormy/microsoft-graphrag-with-an-rdf-knowledge-graph-part-2-d8d291a39ed1 + https://generativeai.pub/knowledge-graph-extraction-visualization-with-local-llm-from-unstructured-text-a-history-example-94c63b366fed?gi=b98ffc33dcbc + https://memgraph.com/webinars/optimizing-insulin-management-the-role-of-graphrag-in-patient-care + https://ai.plainenglish.io/metagraphs-and-hypergraphs-for-complex-ai-agent-memory-and-rag-717f6f3589f5 + https://arxiv.org/html/2411.15671v1 + https://arxiv.org/pdf/2410.04739v1 + https://towardsdatascience.com/how-to-query-a-knowledge-graph-with-llms-using-grag-38bfac47a322 + https://arxiv.org/abs/2408.04187 + https://pub.towardsai.net/building-a-knowledge-graph-from-unstructured-text-data-a-step-by-step-guide-c14c926c2229 + https://www.youtube.com/watch?v=g6xBklAIrsA + https://neuml.hashnode.dev/advanced-rag-with-graph-path-traversal +https://volodymyrpavlyshyn.medium.com/unified-knowledge-graph-model-rdf-rdf-vs-lpg-the-end-of-war-a7c14d6ac76f + https://github.com/zjunlp/OneKE +https://blog.gopenai.com/llm-ontology-prompting-for-knowledge-graph-extraction-efdcdd0db3a1?gi=1d8915f0da5e + + + ### Links - RAG 101 diff --git a/Docs/TTS_STT.md b/Docs/TTS_STT.md deleted file mode 100644 index 674d4ef91..000000000 --- a/Docs/TTS_STT.md +++ /dev/null @@ -1,82 +0,0 @@ -# Text-To-Speech / Speech-To-Text Documentation - -## Overview - - - -https://github.com/cpumaxx/sovits-ff-plugin - - - -Train using: https://github.com/Mangio621/Mangio-RVC-Fork/releases, -import the .pth into https://huggingface.co/wok000/vcclient000/tree/main to convert your voice in near real time with about a .25s delay - -https://www.hackster.io/lhl/voicechat2-local-ai-voice-chat-4c48f2 - -https://github.com/abus-aikorea/voice-pro - -https://github.com/myshell-ai/MeloTTS -https://github.com/idiap/coqui-ai-TTS -https://docs.inferless.com/cookbook/serverless-customer-service-bot - - -https://huggingface.co/spaces/lamm-mit/PDF2Audio - -https://huggingface.co/spaces/bencser/episodegen - -https://github.com/Picovoice/speech-to-text-benchmark - -https://huggingface.co/papers/2410.02678 - -https://github.com/livekit/agents - - -https://github.com/lamm-mit/PDF2Audio -https://github.com/Purfview/whisper-standalone-win -https://github.com/ictnlp/LLaMA-Omni - -https://github.com/SWivid/F5-TTS - - -https://github.com/matatonic/openedai-speech - -https://github.com/RVC-Boss/GPT-SoVITS -https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) -https://rentry.org/GPT-SoVITS-guide -https://rentry.org/GPT-SoVITS-guide -It's just the 3 buttons (speech-to-text, ssl, semantics) and then training. - -The default training settings on the gradio UI are fine but I save epoch 12-16-24 on SoVITS for testing as that's the sweet spot range. - -Next thing that matters a lot is the ref audio you pick, and you can also drop your entire dataset into the "multiple references to average tone" box, which can improve the voice - -Only thing I changed was remove the space at the beginning of each lines in your list file - -(Look at batch size/ list file) - -And make sure you get the latest version https://github.com/RVC-Boss/GPT-SoVITS/releases - -https://github.com/souzatharsis/podcastfy - -https://github.com/THUDM/GLM-4-Voice/tree/main - -https://huggingface.co/cydxg/glm-4-voice-9b-int4/blob/main/README_en.md - -https://github.com/meta-llama/llama-recipes/tree/main/recipes%2Fquickstart%2FNotebookLlama - - -https://sakshi113.github.io/mmau_homepage/ - -https://github.com/fishaudio/fish-speech/tree/main -https://github.com/fishaudio/fish-speech/blob/main/Start_Agent.md -https://huggingface.co/fishaudio/fish-agent-v0.1-3b/tree/main - -https://github.com/pixelpump/Ai-Interview-Assistant-Python - -https://github.com/Standard-Intelligence/hertz-dev - -https://github.com/edwko/OuteTTS -https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF -https://huggingface.co/NexaAIDev/Qwen2-Audio-7B-GGUF - -https://www.twilio.com/en-us/blog/twilio-openai-realtime-api-launch-integration \ No newline at end of file diff --git a/Docs/VLM.md b/Docs/VLM.md deleted file mode 100644 index 601fca1b1..000000000 --- a/Docs/VLM.md +++ /dev/null @@ -1,7 +0,0 @@ -# VLM - - - -https://github.com/matatonic/openedai-vision - - diff --git a/Docs/Versioning_Scheme.jpg b/Docs/Versioning_Scheme.jpg new file mode 100644 index 000000000..feb06318e Binary files /dev/null and b/Docs/Versioning_Scheme.jpg differ diff --git a/LICENSE.txt b/LICENSE.txt index 5fbc12911..fd84aa9a3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2024 rmusser01 + Copyright 2024 Robert Musser / rmusser01 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 375ee1064..860c16924 100644 --- a/README.md +++ b/README.md @@ -69,12 +69,12 @@ Screenshot of the Frontpage ![Screenshot](Docs/Screenshots/blank-front.png) - **Backup Management** - A way to back up the DBs, view backups, and restore from a backup. (4 SQLite DBs: Media, Character Chats, RAG Chats, Embeddings) - **Trashcan Support** - A way to 'soft' delete content, and restore it if needed. (Helps with accidental deletions) - Trashcan is only for the MediaDB. - **Support for 7 Local LLM APIs:** `Llama.cpp`, `Kobold.cpp`, `Oobabooga`, `TabbyAPI`, `vLLM`, `Ollama`, `Aphrodite`, `Custom OpenAI API`. -- **Support for 8 Commercial APIs:** `Claude Sonnet 3.5`, `Cohere Command R+`, `DeepSeek`, `Groq`, `HuggingFace`, `Mistral`, `OpenAI`, `OpenRouter`. +- **Support for 9 Commercial APIs:** `Claude Sonnet 3.5`, `Cohere Command R+`, `DeepSeek`, `Google`, `Groq`, `HuggingFace`, `Mistral`, `OpenAI`, `OpenRouter`. - **Local Audio Recording with Transcription** - Record audio locally and transcribe it. - **Structured Prompt Creation and Management** - Create prompts using a structured approach, and then edit and use them in your chats. Or delete them. - Also have the ability to import prompts individually or in bulk. As well as export them as markdown documents. - See `./Docs/Prompts/` for examples of prompts. and `./Docs/Propmts/TEMPLATE.md` for the prompt template used in tldw. -- Features to come: Anki Flashcard Deck Editing (Creation is in), Mindmap creation from content(currently in under `Utilities`, uses PlantUML), better document handling, migration to a FastAPI backend(Gradio is a placeholder UI), and more. +- Features to come: Migration to a FastAPI backend(Gradio is a placeholder UI). Anki Flashcard Deck Editing (Creation is in), Mindmap creation from content(currently in under `Utilities`, uses PlantUML), better document handling, and more. #### The original scripts by `the-crypt-keeper` for transcribing and summarizing youtube videos are available here: [scripts here](https://github.com/the-crypt-keeper/tldw/tree/main/tldw-original-scripts) @@ -189,9 +189,10 @@ All features are designed to run **locally** on your device, ensuring privacy an - Pytorch + other ML libraries will also cause the size to increase. - As such, I would say you want at least 12GB of free space on your system to devote to the app. - Text content itself is tiny, but the supporting libraries + ML models can be quite large. -- **Linux** +- **Linux (Tested on Debian/Ubuntu/Fedora)** 1. Download necessary packages (Python3, ffmpeg, portaudio19-dev - `sudo apt install ffmpeg portaudio19-dev gcc build-essential python3-dev` or `dnf install ffmpeg portaudio19-dev gcc build-essential python3-dev`, Update your GPU Drivers/CUDA drivers if you'll be running an LLM locally) * `portaudio19-dev` for pyaudio, `python3-dev gcc build-essential` for building it. + * If you're using another system, you can try `pip install pyaudio` and see if that works. Otherwise, you can run it as a container. 2. Open a terminal, navigate to the directory you want to install the script in, and run the following commands: 3. `git clone https://github.com/rmusser01/tldw` 4. `cd tldw` @@ -610,6 +611,8 @@ None of these companies exist to provide AI services in 2024. They’re only doi * https://www.getcoralai.com/ * https://getcahier.com/#features * https://msty.app/ + * https://afforai.com + * https://penno.io/ ------------ ### Credits diff --git a/Server_API/API_README.md b/Server_API/API_README.md new file mode 100644 index 000000000..7861c896d --- /dev/null +++ b/Server_API/API_README.md @@ -0,0 +1,134 @@ +# API Documentation + +## Overview + +API uses FastAPI to provide a RESTful interface to the backend services. The API is designed to be simple and easy to use, with a focus on providing a clean interface for the frontend to interact with. + +- **URLs** + - Main page: http://127.0.0.1:8000 + - API Documentation page: http://127.0.0.1:8000/docs + + + +## Endpoints + + + +``` +Here’s the important part. We’ll create: + + A global asyncio.Queue of “write tasks.” + A WriteTask class that holds the SQL, parameters, and an asyncio.Future to signal completion. + A background worker (writer_worker) that pops tasks from the queue, executes them, and sets the result in the Future. + Endpoints that push a WriteTask onto the queue, then await the Future before returning. + +# main.py +import asyncio +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import Any, Tuple, Union + +from database import get_db_connection + +app = FastAPI() + +# ----------------------------- +# 1) A global queue + task class +# ----------------------------- +class WriteTask: + """Holds SQL, parameters, and a Future to let the enqueuing code wait for completion.""" + def __init__(self, sql: str, params: tuple[Any, ...]): + self.sql = sql + self.params = params + self.future: asyncio.Future = asyncio.get_event_loop().create_future() + +write_queue: asyncio.Queue[WriteTask] = asyncio.Queue() + + +# ----------------------------- +# 2) The background worker +# ----------------------------- +async def writer_worker(): + """Continuously processes write tasks from the queue, one at a time.""" + while True: + task: WriteTask = await write_queue.get() + try: + # Perform the write + with get_db_connection() as conn: + conn.execute(task.sql, task.params) + conn.commit() + + # If success, set the result of the Future + task.future.set_result(True) + except Exception as e: + # If failure, set the exception so the caller can handle it + task.future.set_exception(e) + finally: + write_queue.task_done() + + +# ----------------------------- +# 3) Start the worker on startup +# ----------------------------- +@app.on_event("startup") +async def startup_event(): + # Launch the writer worker as a background task + asyncio.create_task(writer_worker()) + + +# ----------------------------- +# 4) Pydantic model for input +# ----------------------------- +class ItemCreate(BaseModel): + name: str + + +# ----------------------------- +# 5) Write endpoint (POST) +# ----------------------------- +@app.post("/items") +async def create_item(item: ItemCreate): + """Queue a write to the database, then wait for its completion.""" + sql = "INSERT INTO items (name) VALUES (?)" + params = (item.name,) + + # Create a WriteTask + write_task = WriteTask(sql, params) + + # Put the task in the queue + await write_queue.put(write_task) + + # Wait for the task to complete + try: + result = await write_task.future # This will be True if successful + return {"status": "success", "name": item.name} + except Exception as exc: + # If the DB write failed for some reason, raise a 500 + raise HTTPException(status_code=500, detail=str(exc)) + + +# ----------------------------- +# 6) Read endpoint (GET) +# ----------------------------- +@app.get("/items") +def read_items(): + """Simple read operation that does not need the queue.""" + with get_db_connection() as conn: + cursor = conn.cursor() + cursor.execute("SELECT id, name FROM items") + rows = cursor.fetchall() + return [{"id": row[0], "name": row[1]} for row in rows] + +Explanation + + WriteTask stores (sql, params, future). The future is how we pass success/failure back to the original request. + When a request hits POST /items, we: + Construct a WriteTask. + put() it on the write_queue. + Immediately await write_task.future. We don’t return until the DB operation is done. + The writer_worker loop picks tasks in FIFO order and executes them one-by-one, guaranteeing no concurrency for writes (thus avoiding locks). + On success, task.future.set_result(True) is called. On failure, task.future.set_exception(e). + The awaiting endpoint sees either a success (and returns HTTP 200) or an exception (and returns HTTP 500). + + This pattern means each request is effectively serialized for writes, but the user still gets a definitive success/failure response in the same request/response cycle. +``` \ No newline at end of file diff --git a/Server_API/Dockerfile b/Server_API/Dockerfile deleted file mode 100644 index eb5075d0c..000000000 --- a/Server_API/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM python:3.11 - -WORKDIR /code - -COPY ./requirements.txt /code/requirements.txt -RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt - -COPY ./app /code/app - -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/Server_API/app/api/v1/endpoints/video_processing.py b/Server_API/app/api/v1/endpoints/video_processing.py deleted file mode 100644 index 6a1915282..000000000 --- a/Server_API/app/api/v1/endpoints/video_processing.py +++ /dev/null @@ -1,25 +0,0 @@ -from fastapi import APIRouter, BackgroundTasks, HTTPException -from typing import List, Optional -from Server_API.app.services.video_processing_service import process_video_task - -router = APIRouter() - -# @router.post("/process-video", summary="Process a video", description="Download, transcribe, and summarize a video from the given URL.") -# async def process_video( -# url: str = Query(..., description="URL of the video to process"), -# whisper_model: str = Query(..., description="Whisper model to use for transcription"), -# custom_prompt: Optional[str] = Query(None, description="Custom prompt for summarization"), -# api_name: str = Query(..., description="Name of the API to use for summarization"), -# api_key: str = Query(..., description="API key for the summarization service"), -# keywords: List[str] = Query(default=[], description="Keywords to associate with the video"), -# diarize: bool = Query(False, description="Whether to perform speaker diarization"), -# start_time: Optional[str] = Query(None, description="Start time for processing (format: HH:MM:SS)"), -# end_time: Optional[str] = Query(None, description="End time for processing (format: HH:MM:SS)"), -# include_timestamps: bool = Query(True, description="Whether to include timestamps in the transcription"), -# keep_original_video: bool = Query(False, description="Whether to keep the original video file after processing"), -# background_tasks: BackgroundTasks = BackgroundTasks() -# ): -# task_id = f"task_{url.replace('://', '_').replace('/', '_')}" -# background_tasks.add_task(process_video_task, url, whisper_model, custom_prompt, api_name, api_key, -# keywords, diarize, start_time, end_time, include_timestamps, keep_original_video) -# return {"task_id": task_id, "message": "Video processing started"} \ No newline at end of file diff --git a/Server_API/app/main.py b/Server_API/app/main.py index 578c50127..07a90a5a4 100644 --- a/Server_API/app/main.py +++ b/Server_API/app/main.py @@ -1,11 +1,22 @@ +# main.py +# Description: This file contains the main FastAPI application, which serves as the primary API for the tldw application. +# +# Imports +# +# 3rd-party Libraries from fastapi import FastAPI -from Server_API.app.api.v1.endpoints import video_processing -from Server_API.app.core.exceptions import setup_exception_handlers +# +# Local Imports +# +######################################################################################################################## +# +# Functions: -app = FastAPI(title="TLDW API", version="1.0.0") -setup_exception_handlers(app) -app.include_router(video_processing.router, prefix="/api/v1") +# Usage: uvicorn main:app --reload +app = FastAPI(title="tldw API", version="1.0.0") @app.get("/") async def root(): - return {"message": "Welcome to the TLDW API"} + return {"message": "Welcome to the tldw API"} + + diff --git a/Tests/ChromaDB/test_chromadb.py b/Tests/ChromaDB/test_chromadb.py index f3526f6b4..3bcd0ccef 100644 --- a/Tests/ChromaDB/test_chromadb.py +++ b/Tests/ChromaDB/test_chromadb.py @@ -7,6 +7,9 @@ from unittest.mock import patch, MagicMock # Third-party library imports import pytest + + + # #################################################################################################### # @@ -19,16 +22,20 @@ print(f"Project root added to sys.path: {project_root}") # Local Imports +#from App_Function_Libraries.Utils.Utils import load_and_log_configs from App_Function_Libraries.RAG.ChromaDB_Library import ( process_and_store_content, check_embedding_status, - reset_chroma_collection, vector_search, store_in_chroma, batched, situate_context, schedule_embedding, - embedding_api_url + reset_chroma_collection, vector_search, store_in_chroma, batched, embedding_api_url ) + # ############################################ # Fixtures for Reusable Mocking and Setup ############################################ +default_api_endpoint = "openai" + + # Fixture to mock a ChromaDB collection @pytest.fixture def mock_collection(): diff --git a/Tests/WebSearch/DDG_WebSearch.py b/Tests/WebSearch/DDG_WebSearch.py new file mode 100644 index 000000000..3dce5895f --- /dev/null +++ b/Tests/WebSearch/DDG_WebSearch.py @@ -0,0 +1,123 @@ +# DDG_WebSearch.py +# Description: DuckDuckGo Web Search API Unit Test +# +# Imports +import unittest +from typing import List, Dict +# +# 3rd party imports +# +# Local imports +from App_Function_Libraries.Web_Scraping.WebSearch_APIs import search_web_ddg +# +####################################################################################################################### +# +# Functions: + +def example_usage(): + """Example usage of the DuckDuckGo search function""" + try: + # Basic search + results = search_web_ddg("Python programming") + print(f"Found {len(results)} results for 'Python programming'") + + # Print first 3 results + for i, result in enumerate(results[:3], 1): + print(f"\nResult {i}:") + print(f"Title: {result['title']}") + print(f"URL: {result['href']}") + print(f"Description: {result['body'][:150]}...") + + # Search with different parameters + limited_results = search_web_ddg( + keywords="artificial intelligence news", + region="us-en", + safesearch="on", + max_results=5 + ) + print(f"\nFound {len(limited_results)} limited results") + + except search_web_ddg as e: + print(f"Search failed: {e}") + +class TestDuckDuckGoSearch(unittest.TestCase): + """Test cases for DuckDuckGo search function""" + + def test_basic_search(self): + """Test basic search functionality""" + results = search_web_ddg("Python programming") + self.assertIsInstance(results, list) + self.assertTrue(len(results) > 0) + + # Check result structure + first_result = results[0] + self.assertIn('title', first_result) + self.assertIn('href', first_result) + self.assertIn('body', first_result) + + # Check data types + self.assertIsInstance(first_result['title'], str) + self.assertIsInstance(first_result['href'], str) + self.assertIsInstance(first_result['body'], str) + + # Check for non-empty values + self.assertTrue(first_result['title']) + self.assertTrue(first_result['href']) + self.assertTrue(first_result['body']) + + def test_max_results(self): + """Test max_results parameter""" + max_results = 5 + results = search_web_ddg("Python programming", max_results=max_results) + self.assertLessEqual(len(results), max_results) + + def test_invalid_region(self): + """Test invalid region handling""" + results = search_web_ddg("Python", region="invalid-region") + self.assertIsInstance(results, list) # Should still return results with default region + + def test_result_uniqueness(self): + """Test that results are unique""" + results = search_web_ddg("Python programming", max_results=10) + urls = [result['href'] for result in results] + unique_urls = set(urls) + self.assertEqual(len(urls), len(unique_urls)) + + def test_url_normalization(self): + """Test URL normalization""" + results = search_web_ddg("Python programming") + for result in results: + self.assertTrue(result['href'].startswith(('http://', 'https://'))) + + def validate_search_results(self, results: List[Dict[str, str]]) -> bool: + """Helper method to validate search results structure""" + if not results: + return False + + required_keys = {'title', 'href', 'body'} + for result in results: + if not all(key in result for key in required_keys): + return False + if not all(isinstance(result[key], str) for key in required_keys): + return False + if not all(result[key].strip() for key in required_keys): + return False + return True + + +def run_tests(): + """Run all tests""" + unittest.main(argv=[''], exit=False) + + +if __name__ == "__main__": + # Example usage + print("Running example search:") + example_usage() + + print("\nRunning tests:") + run_tests() + +# +# End of DDG_WebSearch.py +####################################################################################################################### diff --git a/requirements.txt b/requirements.txt index a85f2f6fe..6e0597f93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,6 +31,7 @@ openai #outlines pandas Pillow +#piper-tts playwright # I'm not using postgres yet... #psycopg2 diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..9e081f195 --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup, find_packages + +setup( + name="tldw", + version="0.1.0", + author="Robert Musser", + author_email="contact@tldwproject.com", + description="A short description of your project", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://github.com/rmusser01/tldw", + packages=find_packages(), + install_requires=[ + "numpy", + "requests", + ], + classifiers=[ # Classifiers for PyPI (optional) + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache 2.0 License", + "Operating System :: OS Independent", + ], + python_requires=">=3.9", +) + +# Dev +pytest-asyncio \ No newline at end of file diff --git a/summarize.py b/summarize.py index 1941ad8f9..054a791ec 100644 --- a/summarize.py +++ b/summarize.py @@ -47,6 +47,9 @@ ############# # Global variables setup # FIXME + +running_in_debug_mode = False + custom_prompt_summarize_bulleted_notes = (""" You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST] **Bulleted Note Creation Guidelines** @@ -697,7 +700,7 @@ def main(input_path, api_name=None, api_key=None, if media_path.lower().endswith(('.txt', '.md')): if media_path.lower().endswith('.txt'): # Handle text file ingestion - result = ingest_text_file(media_path) + result = ingest_text_file logging.info(result) elif media_path.lower().endswith(('.mp4', '.avi', '.mov')): if diarize: