From 828665d9c6dde73affcf810fed1e0b545394bf5d Mon Sep 17 00:00:00 2001 From: Robert Date: Thu, 9 May 2024 18:12:23 -0700 Subject: [PATCH] Eh --- .gitignore | Bin 6904 -> 6936 bytes README.md | 6 + config.txt | 5 + summarize.py | 196 ++++++++---------- .../Samples}/compare/aoe-english.json | 0 .../Samples}/compare/aoe.yaml | 0 .../Samples}/compare/ufo-13b-english.json | 0 .../Samples}/compare/ufo-13b.yaml | 0 .../Samples}/compare/ufo-english.json | 0 .../Samples}/compare/ufo.yaml | 0 ... Carlson (2023) [zaB_20bkoA4].diarize.json | 0 ...ker Carlson (2023) [zaB_20bkoA4].info.json | 0 ... Carlson (2023) [zaB_20bkoA4].summary.json | 0 ...tory interview [YRvf00NooN8].diarize.json" | 0 ...factory interview [YRvf00NooN8].info.json" | 0 ...tory interview [YRvf00NooN8].summary.json" | 0 ...The Resurgence) [jnoxjLJind4].diarize.json | 0 ...t (The Resurgence) [jnoxjLJind4].info.json | 0 ...n Podcast #185 [4dC_nRYIDZU].diarize.json" | 0 ...dman Podcast #185 [4dC_nRYIDZU].info.json" | 0 ...n Podcast #185 [4dC_nRYIDZU].summary.json" | 0 ...Affairs Hearing [KQ7Dw-739VY].diarize.json | 0 ...gn Affairs Hearing [KQ7Dw-739VY].info.json | 0 ...oboros-l2-13b-gpt4-1.4.1_1691017407.ndjson | 0 ...oboros-l2-13b-gpt4-1.4.1_1691017434.ndjson | 0 ...oboros-l2-13b-gpt4-1.4.1_1691017487.ndjson | 0 ...oboros-l2-13b-gpt4-1.4.1_1691017596.ndjson | 0 ...xt-300_english_airoboros-l2-context.ndjson | 0 ...ll-300_english_airoboros-l2-context.ndjson | 0 ...on-300_english_airoboros-l2-context.ndjson | 0 ...ex-300_english_airoboros-l2-context.ndjson | 0 ...fo-300_english_airoboros-l2-context.ndjson | 0 .../Samples}/prompts/airoboros-l2-context.txt | 0 .../chunker.py | 0 .../compare-app.py | 0 .../compare.py | 0 .../merger.py | 0 .../params/summary.json | 0 .../pyannote.py | 0 .../roller-chatgpt-v2.py | 0 .../roller-chatgpt.py | 0 .../roller-exllama.py | 0 .../roller-vllm.py | 0 43 files changed, 103 insertions(+), 104 deletions(-) rename {Samples => tldw-original-scripts/Samples}/compare/aoe-english.json (100%) rename {Samples => tldw-original-scripts/Samples}/compare/aoe.yaml (100%) rename {Samples => tldw-original-scripts/Samples}/compare/ufo-13b-english.json (100%) rename {Samples => tldw-original-scripts/Samples}/compare/ufo-13b.yaml (100%) rename {Samples => tldw-original-scripts/Samples}/compare/ufo-english.json (100%) rename {Samples => tldw-original-scripts/Samples}/compare/ufo.yaml (100%) rename {Samples => tldw-original-scripts/Samples}/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json (100%) rename {Samples => tldw-original-scripts/Samples}/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json (100%) rename {Samples => tldw-original-scripts/Samples}/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json (100%) rename "Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" => "tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" (100%) rename "Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" => "tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" (100%) rename "Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" => "tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" (100%) rename {Samples => tldw-original-scripts/Samples}/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json (100%) rename {Samples => tldw-original-scripts/Samples}/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json (100%) rename "Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" => "tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" (100%) rename "Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" => "tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" (100%) rename "Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" => "tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" (100%) rename {Samples => tldw-original-scripts/Samples}/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json (100%) rename {Samples => tldw-original-scripts/Samples}/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json (100%) rename {Samples => tldw-original-scripts/Samples}/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/prepare_aoe-small-300_english_airoboros-l2-context.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/prepare_elon-300_english_airoboros-l2-context.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/prepare_lex-300_english_airoboros-l2-context.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/prepare_ufo-300_english_airoboros-l2-context.ndjson (100%) rename {Samples => tldw-original-scripts/Samples}/prompts/airoboros-l2-context.txt (100%) rename {tldw-scripts => tldw-original-scripts}/chunker.py (100%) rename {tldw-scripts => tldw-original-scripts}/compare-app.py (100%) rename {tldw-scripts => tldw-original-scripts}/compare.py (100%) rename {tldw-scripts => tldw-original-scripts}/merger.py (100%) rename {tldw-scripts => tldw-original-scripts}/params/summary.json (100%) rename {tldw-scripts => tldw-original-scripts}/pyannote.py (100%) rename {tldw-scripts => tldw-original-scripts}/roller-chatgpt-v2.py (100%) rename {tldw-scripts => tldw-original-scripts}/roller-chatgpt.py (100%) rename {tldw-scripts => tldw-original-scripts}/roller-exllama.py (100%) rename {tldw-scripts => tldw-original-scripts}/roller-vllm.py (100%) diff --git a/.gitignore b/.gitignore index 15d8ddd6a2d81fb3894d1ac84b36752b0c8fd228..03314cafd1f76d7ce6416fb56c23c131c3004a9f 100644 GIT binary patch delta 41 qcmexiI>U_V|Gy0?Op+lCsSL#oB|xmt5CmqIGUR~xo8_5qN&o;Bz6;|3 delta 13 UcmbPX_QP}|lM2&j7p5x`03@vhx&QzG diff --git a/README.md b/README.md index 28603b9ee..bf30f2811 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ GUI 4. Llama.cpp 5. Kobold.cpp 6. Oobabooga + 7. HuggingFace - **Planned to Support** 1. TabbyAPI @@ -203,6 +204,11 @@ By default videos, transcriptions and summaries are stored in a folder with the * `llama3-8b-8192` * `llama3-70b-8192` * `mixtral-8x7b-32768` + - HuggingFace: + * `CohereForAI/c4ai-command-r-plus` + * `meta-llama/Meta-Llama-3-70B-Instruct` + * `meta-llama/Meta-Llama-3-8B-Instruct` + * Supposedly you can use any model on there, but this is for reference for the free demo instance, in case you'd like to host your own. - OpenAI: * `gpt-4-turbo` * `gpt-4-turbo-preview` diff --git a/config.txt b/config.txt index 303df5c1d..6acbeaa03 100644 --- a/config.txt +++ b/config.txt @@ -7,6 +7,9 @@ groq_api_key = idk groq_model = FIXME openai_api_key = openai_model = gpt-4-turbo +huggingface_api_token = +huggingface_model = CohereForAI/c4ai-command-r-plus + [Local-API] kobold_api_key = @@ -16,9 +19,11 @@ llama_api_IP = http://127.0.0.1:8080/completion ooba_api_key = ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions + [Paths] output_path = Results logging_file = Logs + [Processing] processing_choice = cuda \ No newline at end of file diff --git a/summarize.py b/summarize.py index 52a0a6c7f..16ccc729e 100644 --- a/summarize.py +++ b/summarize.py @@ -70,12 +70,14 @@ cohere_api_key = config.get('API', 'cohere_api_key', fallback=None) groq_api_key = config.get('API', 'groq_api_key', fallback=None) openai_api_key = config.get('API', 'openai_api_key', fallback=None) +huggingface_api_token = config.get('API', 'huhuggingface_api_token', fallback=None) # Models anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229') cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus') groq_model = config.get('API', 'groq_model', fallback='FIXME') openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo') +huggingface_model = config.get('API', 'huggingface_model', fallback='CohereForAI/c4ai-command-r-plus') # Local-Models kobold_api_IP = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate') @@ -329,6 +331,27 @@ def process_local_file(file_path): # Video Download/Handling # +def process_url(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False): + try: + results = main(input_path, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, download_video_flag=download_video_flag) + + if results: + transcription_result = results[0] + json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') + with open(json_file_path, 'r') as file: + json_data = json.load(file) + + summary = transcription_result.get('summary', '') + + return json_data, summary, json_file_path, json_file_path.replace('.segments.json', '_summary.txt') + else: + return None, "No results found.", None, None + except Exception as e: + error_message = f"An error occurred: {str(e)}" + return None, error_message, None, None + + + def create_download_directory(title): base_dir = "Results" # Remove characters that are illegal in Windows filenames and normalize @@ -1110,24 +1133,43 @@ def save_summary_to_file(summary, file_path): # Gradio UI # -def process_url(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False): +# Only to be used when configured with Gradio for HF Space +def summarize_with_huggingface(api_key, file_path): try: - results = main(input_path, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, download_video_flag=download_video_flag) + logging.debug("huggingface: Loading json data for summarization") + with open(file_path, 'r') as file: + segments = json.load(file) - if results: - transcription_result = results[0] - json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') - with open(json_file_path, 'r') as file: - json_data = json.load(file) - - summary = transcription_result.get('summary', '') - - return json_data, summary, json_file_path, json_file_path.replace('.segments.json', '_summary.txt') + logging.debug("huggingface: Extracting text from the segments") + text = extract_text_from_segments(segments) + + API_TOKEN = huggingface_api_token + headers = {"Authorization": f"Bearer {API_TOKEN}"} + + logging.debug("huggingface: Creating query...") + data = query("Can you please let us know more details about your ") + + API_URL = f"https://api-inference.huggingface.co/models/{huggingface_model}" + response = requests.post(API_URL, headers=headers, json=data) + + if response.status_code == 200: + summary = response.json()['choices'][0]['message']['content'].strip() + logging.debug("huggingface: Summarization successful") + print("Summarization successful.") + return summary else: - return None, "No results found.", None, None + logging.debug("huggingface: Summarization failed") + print("Failed to process summary:", response.text) + return None except Exception as e: - error_message = f"An error occurred: {str(e)}" - return None, error_message, None, None + logging.debug("huggingface: Error in processing: %s", str(e)) + print("Error occurred while processing summary with huggingface:", str(e)) + return None + + + + def same_auth(username, password): + return username == password @@ -1142,7 +1184,7 @@ def process_transcription(json_data): fn=process_url, inputs=[ gr.components.Textbox(label="URL"), - gr.components.Dropdown(choices=["openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], label="API Name"), + gr.components.Dropdown(choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], label="API Name"), gr.components.Textbox(label="API Key"), gr.components.Number(value=2, label="Number of Speakers"), gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"), @@ -1161,64 +1203,8 @@ def process_transcription(json_data): allow_flagging="never" ) - -# FIXME - c/p from openai - only to be used when configured with Gradio for HF Space -def summarize_with_huggingface(api_key, file_path): - try: - logging.debug("openai: Loading json data for summarization") - with open(file_path, 'r') as file: - segments = json.load(file) - - logging.debug("openai: Extracting text from the segments") - text = extract_text_from_segments(segments) - - headers = { - 'Authorization': f'Bearer {api_key}', - 'Content-Type': 'application/json' - } - - logging.debug("openai: Preparing data + prompt for submittal") - prompt_text = f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points" - data = { - "model": "CohereForAI/c4ai-command-r-plus", - "messages": [ - { - "role": "system", - "content": "You are a professional summarizer." - }, - { - "role": "user", - "content": prompt_text - } - ], - "max_tokens": 4096, # Adjust tokens as needed - "temperature": 0.7 - } - logging.debug("openai: Posting request") - response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data) - - if response.status_code == 200: - summary = response.json()['choices'][0]['message']['content'].strip() - logging.debug("openai: Summarization successful") - print("Summarization successful.") - return summary - else: - logging.debug("openai: Summarization failed") - print("Failed to process summary:", response.text) - return None - except Exception as e: - logging.debug("openai: Error in processing: %s", str(e)) - print("Error occurred while processing summary with openai:", str(e)) - return None - - - - def same_auth(username, password): - return username == password - - - - iface.launch(auth=same_auth,share=True) + #iface.launch(auth=same_auth,share=False) + iface.launch() # # @@ -1235,6 +1221,8 @@ def same_auth(username, password): # def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False): + if input_path is None and args.user_interface: + return [] start_time = time.monotonic() paths = [] # Initialize paths as an empty list if os.path.isfile(input_path) and input_path.endswith('.txt'): @@ -1373,40 +1361,40 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model= if args.user_interface: launch_ui() else: - if args.input_path is None: + if not args.input_path: parser.print_help() sys.exit(1) - logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s') - - logging.info('Starting the transcription and summarization process.') - logging.info(f'Input path: {args.input_path}') - logging.info(f'API Name: {args.api_name}') - logging.debug(f'API Key: {args.api_key}') # ehhhhh - logging.info(f'Number of speakers: {args.num_speakers}') - logging.info(f'Whisper model: {args.whisper_model}') - logging.info(f'Offset: {args.offset}') - logging.info(f'VAD filter: {args.vad_filter}') - logging.info(f'Log Level: {args.log_level}') #lol - - if args.api_name and args.api_key: - logging.info(f'API: {args.api_name}') - logging.info('Summarization will be performed.') - else: - logging.info('No API specified. Summarization will not be performed.') + logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s') + + logging.info('Starting the transcription and summarization process.') + logging.info(f'Input path: {args.input_path}') + logging.info(f'API Name: {args.api_name}') + logging.debug(f'API Key: {args.api_key}') # ehhhhh + logging.info(f'Number of speakers: {args.num_speakers}') + logging.info(f'Whisper model: {args.whisper_model}') + logging.info(f'Offset: {args.offset}') + logging.info(f'VAD filter: {args.vad_filter}') + logging.info(f'Log Level: {args.log_level}') #lol + + if args.api_name and args.api_key: + logging.info(f'API: {args.api_name}') + logging.info('Summarization will be performed.') + else: + logging.info('No API specified. Summarization will not be performed.') - logging.debug("Platform check being performed...") - platform_check() - logging.debug("CUDA check being performed...") - cuda_check() - logging.debug("ffmpeg check being performed...") - check_ffmpeg() + logging.debug("Platform check being performed...") + platform_check() + logging.debug("CUDA check being performed...") + cuda_check() + logging.debug("ffmpeg check being performed...") + check_ffmpeg() - try: - results = main(args.input_path, api_name=args.api_name, api_key=args.api_key, num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset, vad_filter=args.vad_filter, download_video_flag=args.video) - logging.info('Transcription process completed.') - except Exception as e: - logging.error('An error occurred during the transcription process.') - logging.error(str(e)) - sys.exit(1) + try: + results = main(args.input_path, api_name=args.api_name, api_key=args.api_key, num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset, vad_filter=args.vad_filter, download_video_flag=args.video) + logging.info('Transcription process completed.') + except Exception as e: + logging.error('An error occurred during the transcription process.') + logging.error(str(e)) + sys.exit(1) diff --git a/Samples/compare/aoe-english.json b/tldw-original-scripts/Samples/compare/aoe-english.json similarity index 100% rename from Samples/compare/aoe-english.json rename to tldw-original-scripts/Samples/compare/aoe-english.json diff --git a/Samples/compare/aoe.yaml b/tldw-original-scripts/Samples/compare/aoe.yaml similarity index 100% rename from Samples/compare/aoe.yaml rename to tldw-original-scripts/Samples/compare/aoe.yaml diff --git a/Samples/compare/ufo-13b-english.json b/tldw-original-scripts/Samples/compare/ufo-13b-english.json similarity index 100% rename from Samples/compare/ufo-13b-english.json rename to tldw-original-scripts/Samples/compare/ufo-13b-english.json diff --git a/Samples/compare/ufo-13b.yaml b/tldw-original-scripts/Samples/compare/ufo-13b.yaml similarity index 100% rename from Samples/compare/ufo-13b.yaml rename to tldw-original-scripts/Samples/compare/ufo-13b.yaml diff --git a/Samples/compare/ufo-english.json b/tldw-original-scripts/Samples/compare/ufo-english.json similarity index 100% rename from Samples/compare/ufo-english.json rename to tldw-original-scripts/Samples/compare/ufo-english.json diff --git a/Samples/compare/ufo.yaml b/tldw-original-scripts/Samples/compare/ufo.yaml similarity index 100% rename from Samples/compare/ufo.yaml rename to tldw-original-scripts/Samples/compare/ufo.yaml diff --git a/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json b/tldw-original-scripts/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json similarity index 100% rename from Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json rename to tldw-original-scripts/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].diarize.json diff --git a/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json b/tldw-original-scripts/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json similarity index 100% rename from Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json rename to tldw-original-scripts/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].info.json diff --git a/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json b/tldw-original-scripts/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json similarity index 100% rename from Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json rename to tldw-original-scripts/Samples/data/Elon Musk's BRUTALLY Honest Interview With Tucker Carlson (2023) [zaB_20bkoA4].summary.json diff --git "a/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" "b/tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" similarity index 100% rename from "Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" rename to "tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].diarize.json" diff --git "a/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" "b/tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" similarity index 100% rename from "Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" rename to "tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].info.json" diff --git "a/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" "b/tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" similarity index 100% rename from "Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" rename to "tldw-original-scripts/Samples/data/Elon Musk\357\274\232 A future worth getting excited about - TED - Tesla Texas Gigafactory interview [YRvf00NooN8].summary.json" diff --git a/Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json b/tldw-original-scripts/Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json similarity index 100% rename from Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json rename to tldw-original-scripts/Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].diarize.json diff --git a/Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json b/tldw-original-scripts/Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json similarity index 100% rename from Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json rename to tldw-original-scripts/Samples/data/GRAND FINAL - 10,000 AoE2 Event (The Resurgence) [jnoxjLJind4].info.json diff --git "a/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" "b/tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" similarity index 100% rename from "Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" rename to "tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].diarize.json" diff --git "a/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" "b/tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" similarity index 100% rename from "Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" rename to "tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].info.json" diff --git "a/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" "b/tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" similarity index 100% rename from "Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" rename to "tldw-original-scripts/Samples/data/Sam Harris\357\274\232 Consciousness, Free Will, Psychedelics, AI, UFOs, and Meaning - Lex Fridman Podcast #185 [4dC_nRYIDZU].summary.json" diff --git a/Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json b/tldw-original-scripts/Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json similarity index 100% rename from Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json rename to tldw-original-scripts/Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].diarize.json diff --git a/Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json b/tldw-original-scripts/Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json similarity index 100% rename from Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json rename to tldw-original-scripts/Samples/data/Subcommittee on National Security, the Border, and Foreign Affairs Hearing [KQ7Dw-739VY].info.json diff --git a/Samples/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson b/tldw-original-scripts/Samples/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson similarity index 100% rename from Samples/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson rename to tldw-original-scripts/Samples/interview_aoe-small-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017407.ndjson diff --git a/Samples/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson b/tldw-original-scripts/Samples/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson similarity index 100% rename from Samples/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson rename to tldw-original-scripts/Samples/interview_elon-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017434.ndjson diff --git a/Samples/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson b/tldw-original-scripts/Samples/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson similarity index 100% rename from Samples/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson rename to tldw-original-scripts/Samples/interview_lex-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017487.ndjson diff --git a/Samples/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson b/tldw-original-scripts/Samples/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson similarity index 100% rename from Samples/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson rename to tldw-original-scripts/Samples/interview_ufo-300_english_airoboros-l2-context_none_summary_jondurbin-airoboros-l2-13b-gpt4-1.4.1_1691017596.ndjson diff --git a/Samples/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/Samples/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson similarity index 100% rename from Samples/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/Samples/prepare_airoboros-l2-context-300_english_airoboros-l2-context.ndjson diff --git a/Samples/prepare_aoe-small-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/Samples/prepare_aoe-small-300_english_airoboros-l2-context.ndjson similarity index 100% rename from Samples/prepare_aoe-small-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/Samples/prepare_aoe-small-300_english_airoboros-l2-context.ndjson diff --git a/Samples/prepare_elon-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/Samples/prepare_elon-300_english_airoboros-l2-context.ndjson similarity index 100% rename from Samples/prepare_elon-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/Samples/prepare_elon-300_english_airoboros-l2-context.ndjson diff --git a/Samples/prepare_lex-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/Samples/prepare_lex-300_english_airoboros-l2-context.ndjson similarity index 100% rename from Samples/prepare_lex-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/Samples/prepare_lex-300_english_airoboros-l2-context.ndjson diff --git a/Samples/prepare_ufo-300_english_airoboros-l2-context.ndjson b/tldw-original-scripts/Samples/prepare_ufo-300_english_airoboros-l2-context.ndjson similarity index 100% rename from Samples/prepare_ufo-300_english_airoboros-l2-context.ndjson rename to tldw-original-scripts/Samples/prepare_ufo-300_english_airoboros-l2-context.ndjson diff --git a/Samples/prompts/airoboros-l2-context.txt b/tldw-original-scripts/Samples/prompts/airoboros-l2-context.txt similarity index 100% rename from Samples/prompts/airoboros-l2-context.txt rename to tldw-original-scripts/Samples/prompts/airoboros-l2-context.txt diff --git a/tldw-scripts/chunker.py b/tldw-original-scripts/chunker.py similarity index 100% rename from tldw-scripts/chunker.py rename to tldw-original-scripts/chunker.py diff --git a/tldw-scripts/compare-app.py b/tldw-original-scripts/compare-app.py similarity index 100% rename from tldw-scripts/compare-app.py rename to tldw-original-scripts/compare-app.py diff --git a/tldw-scripts/compare.py b/tldw-original-scripts/compare.py similarity index 100% rename from tldw-scripts/compare.py rename to tldw-original-scripts/compare.py diff --git a/tldw-scripts/merger.py b/tldw-original-scripts/merger.py similarity index 100% rename from tldw-scripts/merger.py rename to tldw-original-scripts/merger.py diff --git a/tldw-scripts/params/summary.json b/tldw-original-scripts/params/summary.json similarity index 100% rename from tldw-scripts/params/summary.json rename to tldw-original-scripts/params/summary.json diff --git a/tldw-scripts/pyannote.py b/tldw-original-scripts/pyannote.py similarity index 100% rename from tldw-scripts/pyannote.py rename to tldw-original-scripts/pyannote.py diff --git a/tldw-scripts/roller-chatgpt-v2.py b/tldw-original-scripts/roller-chatgpt-v2.py similarity index 100% rename from tldw-scripts/roller-chatgpt-v2.py rename to tldw-original-scripts/roller-chatgpt-v2.py diff --git a/tldw-scripts/roller-chatgpt.py b/tldw-original-scripts/roller-chatgpt.py similarity index 100% rename from tldw-scripts/roller-chatgpt.py rename to tldw-original-scripts/roller-chatgpt.py diff --git a/tldw-scripts/roller-exllama.py b/tldw-original-scripts/roller-exllama.py similarity index 100% rename from tldw-scripts/roller-exllama.py rename to tldw-original-scripts/roller-exllama.py diff --git a/tldw-scripts/roller-vllm.py b/tldw-original-scripts/roller-vllm.py similarity index 100% rename from tldw-scripts/roller-vllm.py rename to tldw-original-scripts/roller-vllm.py