From 873c4d154990cdadcb3a4a38d4a1141fd41b8e16 Mon Sep 17 00:00:00 2001 From: Alejandro Mendez Date: Mon, 27 May 2024 11:28:00 +0200 Subject: [PATCH] Add voice span support #55 --- tests/test_models.py | 44 +++++++++++++++++++++++++++++++++++ tests/test_webvtt.py | 55 ++++++++++++++++++++++++++++++++++++++++++++ webvtt/models.py | 11 +++++++++ 3 files changed, 110 insertions(+) diff --git a/tests/test_models.py b/tests/test_models.py index 2210e57..c99aac6 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -403,6 +403,50 @@ def test_malformed_start_timestamp(self): '01:00' ) + def test_voice_span(self): + caption = Caption(text='Hello there!') + self.assertEqual(caption.text, 'Hello there!') + self.assertEqual(caption.raw_text, 'Hello there!') + self.assertEqual(caption.voice, 'Homer Simpson') + + def test_voice_span_with_classes(self): + caption = Caption(text='I am Lisa') + self.assertEqual(caption.text, 'I am Lisa') + self.assertEqual( + caption.raw_text, + 'I am Lisa' + ) + self.assertEqual(caption.voice, 'Lisa Simpson') + + def test_voice_span_is_invalid(self): + caption = Caption(text='I like tests') + self.assertEqual(caption.voice, 'Homer Simpson') + + def test_voice_span_removed(self): + caption = Caption(text='I like tests') + self.assertEqual(caption.text, 'I like tests') + self.assertEqual(caption.raw_text, 'I like tests') + self.assertEqual(caption.voice, 'Homer Simpson') + caption.text = 'This is a test' + self.assertEqual(caption.text, 'This is a test') + self.assertEqual(caption.raw_text, 'This is a test') + self.assertIsNone(caption.voice) + class TestStyle(unittest.TestCase): diff --git a/tests/test_webvtt.py b/tests/test_webvtt.py index 613887e..14253dc 100644 --- a/tests/test_webvtt.py +++ b/tests/test_webvtt.py @@ -860,6 +860,61 @@ def test_can_parse_youtube_dl_files(self): vtt.captions[2].text ) + def test_parse_voice_spans(self): + vtt = webvtt.from_string(textwrap.dedent(""" + WEBVTT + + 00:00:00.000 --> 00:00:00.800 + Knock knock + + 00:00:02.100 --> 00:00:06.500 + Who's there? + + 00:00:10.530 --> 00:00:11.090 + Atish + """).strip() + ) + self.assertEqual(len(vtt), 3) + self.assertEqual( + str(vtt[0]), + '00:00:00.000 00:00:00.800 Knock knock' + ) + self.assertEqual( + vtt[0].voice, + 'Lisa Simpson' + ) + self.assertEqual( + str(vtt[1]), + '00:00:02.100 00:00:06.500 Who\'s there?' + ) + self.assertEqual( + vtt[1].voice, + 'Homer Simpson' + ) + self.assertEqual( + str(vtt[2]), + '00:00:10.530 00:00:11.090 Atish' + ) + self.assertEqual( + vtt[2].voice, + 'Lisa Simpson' + ) + + def test_parse_caption_not_a_voice_span(self): + vtt = webvtt.from_string(textwrap.dedent(""" + WEBVTT + + 00:00:00.000 --> 00:00:00.800 + ') + VOICE_SPAN_PATTERN = re.compile(r']+)>') def __init__(self, start: typing.Optional[str] = None, @@ -204,6 +205,16 @@ def text(self, value: str): self.lines = value.splitlines() + @property + def voice(self) -> typing.Optional[str]: + """Return the voice span if present.""" + if self.lines and self.lines[0].startswith('