Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add voice span support #55 #66

Merged
merged 1 commit into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,50 @@ def test_malformed_start_timestamp(self):
'01:00'
)

def test_voice_span(self):
caption = Caption(text='<v Homer Simpson>Hello there!</v>')
self.assertEqual(caption.text, 'Hello there!')
self.assertEqual(caption.raw_text, '<v Homer Simpson>Hello there!</v>')
self.assertEqual(caption.voice, 'Homer Simpson')

def test_voice_span_with_classes(self):
caption = Caption(text='<v.quiet.slow Lisa Simpson>I am Lisa</v>')
self.assertEqual(caption.text, 'I am Lisa')
self.assertEqual(
caption.raw_text,
'<v.quiet.slow Lisa Simpson>I am Lisa</v>'
)
self.assertEqual(caption.voice, 'Lisa Simpson')

def test_voice_span_is_invalid(self):
caption = Caption(text='<v Lets eat donuts')
self.assertEqual(caption.text, '<v Lets eat donuts')
self.assertEqual(
caption.raw_text,
'<v Lets eat donuts'
)
self.assertIsNone(caption.voice)

def test_voice_span_injected(self):
caption = Caption(text='This is a test')
self.assertEqual(caption.text, 'This is a test')
self.assertEqual(caption.raw_text, 'This is a test')
self.assertIsNone(caption.voice)
caption.text = '<v Homer Simpson>I like tests</v>'
self.assertEqual(caption.text, 'I like tests')
self.assertEqual(caption.raw_text, '<v Homer Simpson>I like tests</v>')
self.assertEqual(caption.voice, 'Homer Simpson')

def test_voice_span_removed(self):
caption = Caption(text='<v Homer Simpson>I like tests</v>')
self.assertEqual(caption.text, 'I like tests')
self.assertEqual(caption.raw_text, '<v Homer Simpson>I like tests</v>')
self.assertEqual(caption.voice, 'Homer Simpson')
caption.text = 'This is a test'
self.assertEqual(caption.text, 'This is a test')
self.assertEqual(caption.raw_text, 'This is a test')
self.assertIsNone(caption.voice)


class TestStyle(unittest.TestCase):

Expand Down
55 changes: 55 additions & 0 deletions tests/test_webvtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,61 @@ def test_can_parse_youtube_dl_files(self):
vtt.captions[2].text
)

def test_parse_voice_spans(self):
vtt = webvtt.from_string(textwrap.dedent("""
WEBVTT

00:00:00.000 --> 00:00:00.800
<v.quiet.slow Lisa Simpson>Knock knock</v>

00:00:02.100 --> 00:00:06.500
<v Homer Simpson>Who's there?</v>

00:00:10.530 --> 00:00:11.090
<v.loud Lisa Simpson>Atish</v>
""").strip()
)
self.assertEqual(len(vtt), 3)
self.assertEqual(
str(vtt[0]),
'00:00:00.000 00:00:00.800 Knock knock'
)
self.assertEqual(
vtt[0].voice,
'Lisa Simpson'
)
self.assertEqual(
str(vtt[1]),
'00:00:02.100 00:00:06.500 Who\'s there?'
)
self.assertEqual(
vtt[1].voice,
'Homer Simpson'
)
self.assertEqual(
str(vtt[2]),
'00:00:10.530 00:00:11.090 Atish'
)
self.assertEqual(
vtt[2].voice,
'Lisa Simpson'
)

def test_parse_caption_not_a_voice_span(self):
vtt = webvtt.from_string(textwrap.dedent("""
WEBVTT

00:00:00.000 --> 00:00:00.800
<v Not an actual voice span here
""").strip()
)
self.assertEqual(len(vtt), 1)
self.assertEqual(
str(vtt[0]),
'00:00:00.000 00:00:00.800 <v Not an actual voice span here'
)
self.assertIsNone(vtt[0].voice)


class TestParseSRT(unittest.TestCase):

Expand Down
11 changes: 11 additions & 0 deletions webvtt/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class Caption:
"""Representation of a caption."""

CUE_TEXT_TAGS = re.compile('<.*?>')
VOICE_SPAN_PATTERN = re.compile(r'<v(?:\.\w+)*\s+([^>]+)>')

def __init__(self,
start: typing.Optional[str] = None,
Expand Down Expand Up @@ -204,6 +205,16 @@ def text(self, value: str):

self.lines = value.splitlines()

@property
def voice(self) -> typing.Optional[str]:
"""Return the voice span if present."""
if self.lines and self.lines[0].startswith('<v'):
match = re.match(self.VOICE_SPAN_PATTERN, self.lines[0])
if match:
return match.group(1)

return None


class Style:
"""Representation of a style."""
Expand Down
Loading