glut23 · glut23 · May 27, 2024 · May 27, 2024
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -403,6 +403,50 @@ def test_malformed_start_timestamp(self):
             '01:00'
             )
 
+    def test_voice_span(self):
+        caption = Caption(text='<v Homer Simpson>Hello there!</v>')
+        self.assertEqual(caption.text, 'Hello there!')
+        self.assertEqual(caption.raw_text, '<v Homer Simpson>Hello there!</v>')
+        self.assertEqual(caption.voice, 'Homer Simpson')
+
+    def test_voice_span_with_classes(self):
+        caption = Caption(text='<v.quiet.slow Lisa Simpson>I am Lisa</v>')
+        self.assertEqual(caption.text, 'I am Lisa')
+        self.assertEqual(
+            caption.raw_text,
+            '<v.quiet.slow Lisa Simpson>I am Lisa</v>'
+            )
+        self.assertEqual(caption.voice, 'Lisa Simpson')
+
+    def test_voice_span_is_invalid(self):
+        caption = Caption(text='<v Lets eat donuts')
+        self.assertEqual(caption.text, '<v Lets eat donuts')
+        self.assertEqual(
+            caption.raw_text,
+            '<v Lets eat donuts'
+            )
+        self.assertIsNone(caption.voice)
+
+    def test_voice_span_injected(self):
+        caption = Caption(text='This is a test')
+        self.assertEqual(caption.text, 'This is a test')
+        self.assertEqual(caption.raw_text, 'This is a test')
+        self.assertIsNone(caption.voice)
+        caption.text = '<v Homer Simpson>I like tests</v>'
+        self.assertEqual(caption.text, 'I like tests')
+        self.assertEqual(caption.raw_text, '<v Homer Simpson>I like tests</v>')
+        self.assertEqual(caption.voice, 'Homer Simpson')
+
+    def test_voice_span_removed(self):
+        caption = Caption(text='<v Homer Simpson>I like tests</v>')
+        self.assertEqual(caption.text, 'I like tests')
+        self.assertEqual(caption.raw_text, '<v Homer Simpson>I like tests</v>')
+        self.assertEqual(caption.voice, 'Homer Simpson')
+        caption.text = 'This is a test'
+        self.assertEqual(caption.text, 'This is a test')
+        self.assertEqual(caption.raw_text, 'This is a test')
+        self.assertIsNone(caption.voice)
+
 
 class TestStyle(unittest.TestCase):
 

diff --git a/tests/test_webvtt.py b/tests/test_webvtt.py
@@ -860,6 +860,61 @@ def test_can_parse_youtube_dl_files(self):
             vtt.captions[2].text
             )
 
+    def test_parse_voice_spans(self):
+        vtt = webvtt.from_string(textwrap.dedent("""
+            WEBVTT
+
+            00:00:00.000 --> 00:00:00.800
+            <v.quiet.slow Lisa Simpson>Knock knock</v>
+
+            00:00:02.100 --> 00:00:06.500
+            <v Homer Simpson>Who's there?</v>
+
+            00:00:10.530 --> 00:00:11.090
+            <v.loud Lisa Simpson>Atish</v>
+            """).strip()
+            )
+        self.assertEqual(len(vtt), 3)
+        self.assertEqual(
+            str(vtt[0]),
+            '00:00:00.000 00:00:00.800 Knock knock'
+            )
+        self.assertEqual(
+            vtt[0].voice,
+            'Lisa Simpson'
+            )
+        self.assertEqual(
+            str(vtt[1]),
+            '00:00:02.100 00:00:06.500 Who\'s there?'
+            )
+        self.assertEqual(
+            vtt[1].voice,
+            'Homer Simpson'
+            )
+        self.assertEqual(
+            str(vtt[2]),
+            '00:00:10.530 00:00:11.090 Atish'
+            )
+        self.assertEqual(
+            vtt[2].voice,
+            'Lisa Simpson'
+            )
+
+    def test_parse_caption_not_a_voice_span(self):
+        vtt = webvtt.from_string(textwrap.dedent("""
+            WEBVTT
+
+            00:00:00.000 --> 00:00:00.800
+            <v Not an actual voice span here
+            """).strip()
+            )
+        self.assertEqual(len(vtt), 1)
+        self.assertEqual(
+            str(vtt[0]),
+            '00:00:00.000 00:00:00.800 <v Not an actual voice span here'
+            )
+        self.assertIsNone(vtt[0].voice)
+
 
 class TestParseSRT(unittest.TestCase):
 

diff --git a/webvtt/models.py b/webvtt/models.py
@@ -100,6 +100,7 @@ class Caption:
     """Representation of a caption."""
 
     CUE_TEXT_TAGS = re.compile('<.*?>')
+    VOICE_SPAN_PATTERN = re.compile(r'<v(?:\.\w+)*\s+([^>]+)>')
 
     def __init__(self,
                  start: typing.Optional[str] = None,
@@ -204,6 +205,16 @@ def text(self, value: str):
 
         self.lines = value.splitlines()
 
+    @property
+    def voice(self) -> typing.Optional[str]:
+        """Return the voice span if present."""
+        if self.lines and self.lines[0].startswith('<v'):
+            match = re.match(self.VOICE_SPAN_PATTERN, self.lines[0])
+            if match:
+                return match.group(1)
+
+        return None
+
 
 class Style:
     """Representation of a style."""