forked from the-crypt-keeper/tldw
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added check for playlist and script to generate text file so you can DL/transcribe/summarize entire playlists. Also added first attempt at some tests
- Loading branch information
Showing
3 changed files
with
289 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import sys | ||
import yt_dlp | ||
|
||
def get_playlist_videos(playlist_url): | ||
ydl_opts = { | ||
'extract_flat': True, | ||
'skip_download': True, | ||
'quiet': True | ||
} | ||
|
||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | ||
info = ydl.extract_info(playlist_url, download=False) | ||
|
||
if 'entries' in info: | ||
video_urls = [entry['url'] for entry in info['entries']] | ||
playlist_title = info['title'] | ||
return video_urls, playlist_title | ||
else: | ||
print("No videos found in the playlist.") | ||
return [], None | ||
|
||
def save_to_file(video_urls, filename): | ||
with open(filename, 'w') as file: | ||
file.write('\n'.join(video_urls)) | ||
print(f"Video URLs saved to {filename}") | ||
|
||
if __name__ == '__main__': | ||
if len(sys.argv) < 2: | ||
print("Please provide the playlist URL as a command-line argument.") | ||
sys.exit(1) | ||
|
||
playlist_url = sys.argv[1] | ||
video_urls, playlist_title = get_playlist_videos(playlist_url) | ||
|
||
if video_urls: | ||
filename = f"{playlist_title}.txt" | ||
save_to_file(video_urls, filename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
import os | ||
import shutil | ||
import tempfile | ||
import unittest | ||
from unittest.mock import patch, MagicMock | ||
|
||
# Run this: python -m unittest test_transcription_summarization.py | ||
""" | ||
1. Test error handling: | ||
You mentioned testing error handling in the comments, but there are no specific tests for it in the script. Consider adding tests that intentionally raise exceptions or simulate error conditions to ensure that the script handles them gracefully. Use assertRaises to check if the expected exceptions are raised. | ||
2. Test convert_to_wav: | ||
In the test_convert_to_wav test, you can enhance it by creating a temporary video file using a library like cv2 or moviepy and then testing the conversion with the actual file. This will make the test more comprehensive. | ||
3. Test summarization functions: | ||
You have a test for summarize_with_openai, but there are no tests for other summarization functions like summarize_with_claude, summarize_with_cohere, etc. Consider adding tests for each summarization function to ensure they work as expected. | ||
4. Parameterized tests: | ||
You mentioned using parameterized tests in the comments, but the current script doesn't include any. Consider using the @parameterized decorator from the parameterized library to create parameterized tests. This allows you to test the script with different input paths, API names, models, and configurations without duplicating test code. | ||
5. Integration tests: | ||
The test_main function is a good example of an integration test. Consider adding more integration tests that cover different scenarios and combinations of input paths, API names, and configurations to ensure the script works end-to-end. | ||
6. Test coverage: | ||
Use a test coverage tool like coverage to measure the test coverage of your script. This helps identify areas that may require additional testing. You can run the tests with coverage and generate a coverage report to see which lines of code are covered by the tests. | ||
7. Naming conventions: | ||
Follow consistent naming conventions for test methods and variables. For example, use test_ prefix for test method names and mock_ prefix for mocked objects. | ||
8. Docstrings and comments: | ||
Add docstrings to each test method to describe what the test is verifying. This improves the readability and maintainability of the test script. You can also add comments to explain complex test setups or assertions. | ||
9. Integration tests: | ||
Write integration tests that cover the entire flow of the script, from processing the input path to generating the summary. | ||
Use a combination of real and mocked dependencies to test the integration between different components. | ||
10. Parameterized tests: | ||
Use parameterized tests to test the script with different input paths, API names, models, and other configurations. | ||
This allows you to cover a wide range of scenarios without duplicating test code. | ||
""" | ||
|
||
# Import the necessary functions and classes from your script | ||
from diarize import ( | ||
read_paths_from_file, | ||
process_path, | ||
process_local_file, | ||
create_download_directory, | ||
normalize_title, | ||
get_youtube, | ||
download_video, | ||
convert_to_wav, | ||
speech_to_text, | ||
summarize_with_openai, | ||
summarize_with_claude, | ||
summarize_with_cohere, | ||
summarize_with_groq, | ||
summarize_with_llama, | ||
summarize_with_oobabooga, | ||
save_summary_to_file, | ||
main | ||
) | ||
|
||
class TestTranscriptionSummarization(unittest.TestCase): | ||
def setUp(self): | ||
self.temp_dir = tempfile.mkdtemp() | ||
|
||
def tearDown(self): | ||
shutil.rmtree(self.temp_dir) | ||
|
||
def test_read_paths_from_file(self): | ||
# Create a temporary file with sample paths | ||
file_path = os.path.join(self.temp_dir, 'paths.txt') | ||
with open(file_path, 'w') as file: | ||
file.write('path1\npath2\npath3') | ||
|
||
# Call the function and check the returned paths | ||
paths = read_paths_from_file(file_path) | ||
self.assertEqual(paths, ['path1', 'path2', 'path3']) | ||
|
||
|
||
|
||
@patch('diarize.process_local_file') | ||
def test_process_local_file(self, mock_process_local_file): | ||
mock_process_local_file.return_value = ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav') | ||
result = process_path('/path/to/local/video.mp4') | ||
self.assertEqual(result, ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav')) | ||
|
||
|
||
|
||
def test_normalize_title(self): | ||
title = 'Video Title / with \\ Special: Characters*' | ||
normalized_title = normalize_title(title) | ||
self.assertEqual(normalized_title, 'Video Title _ with _ Special_ Characters') | ||
|
||
|
||
|
||
@patch('diarize.subprocess.run') | ||
def test_convert_to_wav(self, mock_subprocess_run): | ||
video_file_path = '/path/to/video.mp4' | ||
audio_file_path = convert_to_wav(video_file_path) | ||
self.assertEqual(audio_file_path, '/path/to/video.wav') | ||
mock_subprocess_run.assert_called_once() | ||
|
||
|
||
|
||
@patch('diarize.process_local_file') | ||
def test_process_path(self, mock_process_local_file, mock_get_youtube): | ||
# Test processing a URL | ||
mock_get_youtube.return_value = {'title': 'Video Title'} | ||
result = process_path('https://example.com/video.mp4') | ||
self.assertEqual(result, {'title': 'Video Title'}) | ||
|
||
# Test processing a local file | ||
mock_process_local_file.return_value = ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav') | ||
result = process_path('/path/to/local/video.mp4') | ||
self.assertEqual(result, ('/path/to/download', {'title': 'Local Video'}, '/path/to/audio.wav')) | ||
|
||
|
||
|
||
def test_speech_to_text(self): | ||
audio_file_path = '/path/to/audio.wav' | ||
segments = speech_to_text(audio_file_path) | ||
self.assertIsInstance(segments, list) | ||
self.assertTrue(len(segments) > 0) | ||
self.assertIn('start', segments[0]) | ||
self.assertIn('end', segments[0]) | ||
self.assertIn('text', segments[0]) | ||
|
||
|
||
|
||
@patch('diarize.requests.post') | ||
def test_summarize_with_openai(self, mock_post): | ||
mock_response = MagicMock() | ||
mock_response.status_code = 200 | ||
mock_response.json.return_value = {'choices': [{'message': {'content': 'Summary'}}]} | ||
mock_post.return_value = mock_response | ||
|
||
summary = summarize_with_openai('api_key', '/path/to/audio.wav.segments.json', 'gpt-4-turbo') | ||
self.assertEqual(summary, 'Summary') | ||
|
||
|
||
|
||
def test_integration_local_file(self): | ||
# Create a temporary video file | ||
video_file_path = os.path.join(self.temp_dir, 'video.mp4') | ||
with open(video_file_path, 'wb') as file: | ||
file.write(b'dummy video content') | ||
|
||
# Call the main function with the local file path | ||
results = main(video_file_path) | ||
|
||
# Check the expected results | ||
self.assertEqual(len(results), 1) | ||
self.assertEqual(results[0]['video_path'], video_file_path) | ||
self.assertIsNotNone(results[0]['audio_file']) | ||
self.assertIsInstance(results[0]['transcription'], list) | ||
|
||
|
||
|
||
def test_save_summary_to_file(self): | ||
summary = 'This is a summary.' | ||
file_path = '/path/to/audio.wav.segments.json' | ||
save_summary_to_file(summary, file_path) | ||
summary_file_path = file_path. self.assertTrue(os.path.exists(summary_file_path)) | ||
with open(summary_file_path, 'r') as file: | ||
content = file.read() | ||
self.assertEqual(content, summary) | ||
|
||
|
||
|
||
@patch('diarize.get_youtube') | ||
@patch('diarize.download_video') | ||
@patch('diarize.convert_to_wav') | ||
@patch('diarize.speech_to_text') | ||
@patch('diarize.summarize_with_openai') | ||
def test_main(self, mock_summarize, mock_speech_to_text, mock_convert_to_wav, mock_download_video, mock_get_youtube): | ||
# Set up mock return values | ||
mock_get_youtube.return_value = {'title': 'Video Title'} | ||
mock_download_video.return_value = '/path/to/video.mp4' | ||
mock_convert_to_wav.return_value = '/path/to/audio.wav' | ||
mock_speech_to_text.return_value = [{'start': 0, 'end': 5, 'text': 'Hello'}] | ||
mock_summarize.return_value = 'This is a summary.' | ||
|
||
# Call the main function with sample arguments | ||
results = main('https://example.com/video.mp4', api_name='openai', api_key='api_key') | ||
|
||
# Check the expected results | ||
self.assertEqual(len(results), 1) | ||
self.assertEqual(results[0]['video_path'], 'https://example.com/video.mp4') | ||
self.assertEqual(results[0]['audio_file'], '/path/to/audio.wav') | ||
self.assertEqual(results[0]['transcription'], [{'start': 0, 'end': 5, 'text': 'Hello'}]) | ||
self.assertEqual(results[0]['summary'], 'This is a summary.') | ||
|
||
# Check that the expected functions were called with the correct arguments | ||
mock_get_youtube.assert_called_once_with('https://example.com/video.mp4') | ||
mock_download_video.assert_called_once() | ||
mock_convert_to_wav.assert_called_once() | ||
mock_speech_to_text.assert_called_once() | ||
mock_summarize.assert_called_once_with('api_key', '/path/to/audio.wav.segments.json', 'gpt-4-turbo') | ||
|
||
# Add more test methods for other functions... | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |