From c060389d457da4474dea10c61d8d94249a17cb80 Mon Sep 17 00:00:00 2001 From: Bryan Anderson Date: Thu, 9 Jan 2025 02:33:05 +0000 Subject: [PATCH] Explicitly specify protocol (http, ws, grpc) when calling tts(); include backward compatibility --- README.md | 31 ++++----- pyht/async_client.py | 27 ++++---- pyht/client.py | 44 ++++++------ pyht/utils.py | 159 ++++++++++++++++++++++++++++++++++++------- 4 files changed, 188 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index ee3ff23..9521108 100644 --- a/README.md +++ b/README.md @@ -79,17 +79,14 @@ The `tts` method takes the following arguments: - `text`: The text to be converted to speech; a string or list of strings. - `options`: The options to use for the TTS request; a `TTSOptions` object [(see below)](#ttsoptions). - `voice_engine`: The voice engine to use for the TTS request; a string (default `Play3.0-mini-http`). - - `PlayDialog-*`: Our large, expressive English model, which also supports multi-turn two-speaker dialogues. - - `PlayDialog-http`: Streaming and non-streaming audio over HTTP. - - `PlayDialog-ws`: Streaming audio over WebSockets. - - `PlayDialogMultilingual-*`: Our large, expressive multilingual model, which also supports multi-turn two-speaker dialogues. - - `PlayDialogMultilingual-http`: Streaming and non-streaming audio over HTTP. - - `PlayDialogMultilingual-ws`: Streaming audio over WebSockets. - - `Play3.0-mini-*`: Our small, fast multilingual model. - - `Play3.0-mini-http`: Streaming and non-streaming audio over HTTP. - - `Play3.0-mini-ws`: Streaming audio over WebSockets. - - `Play3.0-mini-grpc`: Streaming audio over gRPC. NOTE: This voice engine is ONLY available for Play On-Prem customers. - - `PlayHT2.0-turbo`: Our legacy English-only model, streaming audio over gRPC. + - `PlayDialog`: Our large, expressive English model, which also supports multi-turn two-speaker dialogues. + - `PlayDialogMultilingual`: Our large, expressive multilingual model, which also supports multi-turn two-speaker dialogues. + - `Play3.0-mini`: Our small, fast multilingual model. + - `PlayHT2.0-turbo`: Our legacy English-only model +- `protocol`: The protocol to use to communicate with the Play API (`http` by default except for `PlayHT2.0-turbo` which is `grpc` by default). + - `http`: Streaming and non-streaming audio over HTTP (supports `Play3.0-mini`, `PlayDialog`, and `PlayDialogMultilingual`). + - `ws`: Streaming audio over WebSockets (supports `Play3.0-mini`, `PlayDialog`, and `PlayDialogMultilingual`). + - `grpc`: Streaming audio over gRPC (supports `PlayHT2.0-turbo` for all, and `Play3.0-mini` ONLY for Play On-Prem customers). - `streaming`: Whether or not to stream the audio in chunks (default True); non-streaming is only enabled for HTTP endpoints. ### TTSOptions @@ -117,12 +114,12 @@ The `TTSOptions` class is used to specify the options for the TTS request. It ha - The following options are inference-time hyperparameters of the text-to-speech model; if unset, the model will use default values chosen by Play. - `temperature` (all models): The temperature of the model, a float. - `top_p` (all models): The top_p of the model, a float. - - `text_guidance` (`Play3.0-mini-*` and `PlayHT2.0-turbo` only): The text_guidance of the model, a float. - - `voice_guidance` (`Play3.0-mini-*` and `PlayHT2.0-turbo` only): The voice_guidance of the model, a float. - - `style_guidance` (`Play3.0-mini-*` only): The style_guidance of the model, a float. - - `repetition_penalty` (`Play3.0-mini-*` and `PlayHT2.0-turbo` only): The repetition_penalty of the model, a float. + - `text_guidance` (`Play3.0-mini` and `PlayHT2.0-turbo` only): The text_guidance of the model, a float. + - `voice_guidance` (`Play3.0-mini` and `PlayHT2.0-turbo` only): The voice_guidance of the model, a float. + - `style_guidance` (`Play3.0-mini` only): The style_guidance of the model, a float. + - `repetition_penalty` (`Play3.0-mini` and `PlayHT2.0-turbo` only): The repetition_penalty of the model, a float. - `disable_stabilization` (`PlayHT2.0-turbo` only): Disable the audio stabilization process, a boolean (default `False`). -- `language` (`Play3.0-*` and `PlayDialogMultilingual-*` only): The language of the text to be spoken, a `Language` enum value or `None` (default `ENGLISH`). +- `language` (`Play3.0` and `PlayDialogMultilingual` only): The language of the text to be spoken, a `Language` enum value or `None` (default `ENGLISH`). - `AFRIKAANS` - `ALBANIAN` - `AMHARIC` @@ -160,7 +157,7 @@ The `TTSOptions` class is used to specify the options for the TTS request. It ha - `UKRAINIAN` - `URDU` - `XHOSA` -- The following options are additional inference-time hyperparameters which only apply to the `PlayDialog-*` and `PlayDialogMultilingual-*` models; if unset, the model will use default values chosen by Play. +- The following options are additional inference-time hyperparameters which only apply to the `PlayDialog` and `PlayDialogMultilingual` models; if unset, the model will use default values chosen by Play. - `voice_2` (multi-turn dialogue only): The second voice to use for a multi-turn TTS request; a string. - A URL pointing to a Play voice manifest file. - `turn_prefix` (multi-turn dialogue only): The prefix for the first speaker's turns in a multi-turn TTS request; a string. diff --git a/pyht/async_client.py b/pyht/async_client.py index 72a02e7..444f0ab 100644 --- a/pyht/async_client.py +++ b/pyht/async_client.py @@ -230,6 +230,8 @@ async def stream_tts_input( text_stream: Union[AsyncGenerator[str, None], AsyncIterable[str]], options: TTSOptions, voice_engine: Optional[str] = None, + protocol: Optional[str] = None, + streaming: bool = True ): """Stream input to Play via the text_stream object.""" buffer = io.StringIO() @@ -239,12 +241,12 @@ async def stream_tts_input( buffer.write(" ") # normalize word spacing. if SENTENCE_END_REGEX.match(t) is None: continue - async for data in self.tts(buffer.getvalue(), options, voice_engine): + async for data in self.tts(buffer.getvalue(), options, voice_engine, protocol, streaming): yield data buffer = io.StringIO() # If text_stream closes, send all remaining text, regardless of sentence structure. if buffer.tell() > 0: - async for data in self.tts(buffer.getvalue(), options, voice_engine): + async for data in self.tts(buffer.getvalue(), options, voice_engine, protocol, streaming): yield data def tts( @@ -252,24 +254,23 @@ def tts( text: Union[str, list[str]], options: TTSOptions, voice_engine: Optional[str] = None, + protocol: Optional[str] = None, streaming: bool = True ) -> AsyncIterable[bytes]: metrics = self._telemetry.start("tts-request") try: - voice_engine, protocol = get_voice_engine_and_protocol(voice_engine) + voice_engine, protocol = get_voice_engine_and_protocol(voice_engine, protocol) if protocol == "http": return self._tts_http(text, options, voice_engine, metrics, streaming) elif protocol == "ws": - if streaming: - return self._tts_ws(text, options, voice_engine, metrics) - else: + if not streaming: raise ValueError("Non-streaming is not supported for WebSocket API") + return self._tts_ws(text, options, voice_engine, metrics) elif protocol == "grpc": - if streaming: - return self._tts_grpc(text, options, voice_engine, metrics) - else: + if not streaming: raise ValueError("Non-streaming is not supported for gRPC API") + return self._tts_grpc(text, options, voice_engine, metrics) else: raise ValueError(f"Unknown protocol {protocol}") except Exception as e: @@ -489,7 +490,8 @@ async def _tts_ws( def get_stream_pair( self, options: TTSOptions, - voice_engine: Optional[str] = None + voice_engine: Optional[str] = None, + protocol: Optional[str] = None ) -> tuple['_InputStream', '_OutputStream']: """Get a linked pair of (input, output) streams. @@ -498,7 +500,7 @@ def get_stream_pair( """ shared_q = asyncio.Queue() return ( - _InputStream(self, options, shared_q, voice_engine), + _InputStream(self, options, shared_q, voice_engine, protocol), _OutputStream(shared_q) ) @@ -587,11 +589,12 @@ def __init__( options: TTSOptions, q: asyncio.Queue[Optional[bytes]], voice_engine: Optional[str], + protocol: Optional[str] = None ): self._input = TextStream() async def listen(): - async for output in client.stream_tts_input(self._input, options, voice_engine): + async for output in client.stream_tts_input(self._input, options, voice_engine, protocol): await q.put(output) await q.put(None) diff --git a/pyht/client.py b/pyht/client.py index ec54ffc..0f8cb42 100644 --- a/pyht/client.py +++ b/pyht/client.py @@ -64,7 +64,7 @@ class HTTPFormat(Enum): FORMAT_PCM = "pcm" -# PlayDialog-* and PlayDialogMultilingual-* only +# PlayDialog and PlayDialogMultilingual only class CandidateRankingMethod(Enum): # non-streaming only DescriptionASRWithMeanProbRank = "description_asr_with_mean_prob" @@ -185,21 +185,21 @@ class TTSOptions: temperature: Optional[float] = None top_p: Optional[float] = None - # only applies to Play3.0-* and PlayHT2.0-turbo + # only apply to Play3.0 and PlayHT2.0-turbo text_guidance: Optional[float] = None voice_guidance: Optional[float] = None repetition_penalty: Optional[float] = None - # only applies to Play3.0-* + # only applies to Play3.0 style_guidance: Optional[float] = None - # only applies to PlayHT2.0-* + # only applies to PlayHT2.0 disable_stabilization: Optional[bool] = None - # only applies to Play3.0-* and PlayDialogMultilingual-* + # only applies to Play3.0 and PlayDialogMultilingual language: Optional[Language] = None - # only applies to PlayDialog-* and PlayDialogMultilingual-* + # only apply to PlayDialog and PlayDialogMultilingual # leave the _2 params None if generating single-speaker audio voice_2: Optional[str] = None turn_prefix: Optional[str] = None @@ -293,7 +293,7 @@ def http_prepare_dict(text: List[str], options: TTSOptions, voice_engine: str) - "language": options.language.value if options.language is not None else None, "version": version, - # PlayDialog-* and PlayDialogMultilingual-* + # PlayDialog and PlayDialogMultilingual # leave the _2 params None if generating single-speaker audio "voice_2": options.voice_2, "turn_prefix": options.turn_prefix, @@ -506,7 +506,9 @@ def stream_tts_input( self, text_stream: Union[Generator[str, None, None], Iterable[str]], options: TTSOptions, - voice_engine: Optional[str] = None + voice_engine: Optional[str] = None, + protocol: Optional[str] = None, + streaming: bool = True ) -> Iterable[bytes]: """Stream input to Play.ht via the text_stream object.""" buffer = io.StringIO() @@ -516,35 +518,34 @@ def stream_tts_input( buffer.write(" ") # normalize word spacing. if SENTENCE_END_REGEX.match(t) is None: continue - yield from self.tts(buffer.getvalue(), options, voice_engine) + yield from self.tts(buffer.getvalue(), options, voice_engine, protocol, streaming) buffer = io.StringIO() # If text_stream closes, send all remaining text, regardless of sentence structure. if buffer.tell() > 0: - yield from self.tts(buffer.getvalue(), options, voice_engine) + yield from self.tts(buffer.getvalue(), options, voice_engine, protocol, streaming) def tts( self, text: Union[str, List[str]], options: TTSOptions, voice_engine: Optional[str] = None, + protocol: Optional[str] = None, streaming: bool = True ) -> Iterable[bytes]: metrics = self._telemetry.start("tts-request") try: - voice_engine, protocol = get_voice_engine_and_protocol(voice_engine) + voice_engine, protocol = get_voice_engine_and_protocol(voice_engine, protocol) if protocol == "http": return self._tts_http(text, options, voice_engine, metrics, streaming) elif protocol == "ws": - if streaming: - return self._tts_ws(text, options, voice_engine, metrics) - else: + if not streaming: raise ValueError("Non-streaming is not supported for WebSocket API") + return self._tts_ws(text, options, voice_engine, metrics) elif protocol == "grpc": - if streaming: - return self._tts_grpc(text, options, voice_engine, metrics) - else: + if not streaming: raise ValueError("Non-streaming is not supported for gRPC API") + return self._tts_grpc(text, options, voice_engine, metrics) else: raise ValueError(f"Unknown protocol {protocol}") except Exception as e: @@ -757,7 +758,8 @@ def _tts_ws( def get_stream_pair( self, options: TTSOptions, - voice_engine: Optional[str] = None + voice_engine: Optional[str] = None, + protocol: Optional[str] = None ) -> Tuple['_InputStream', '_OutputStream']: """Get a linked pair of (input, output) streams. @@ -765,7 +767,7 @@ def get_stream_pair( """ shared_q = queue.Queue() return ( - _InputStream(self, options, shared_q, voice_engine), + _InputStream(self, options, shared_q, voice_engine, protocol), _OutputStream(shared_q) ) @@ -818,11 +820,11 @@ class _InputStream: input_stream.done() """ def __init__(self, client: Client, options: TTSOptions, q: queue.Queue[Optional[bytes]], - voice_engine: Optional[str]): + voice_engine: Optional[str], protocol: Optional[str] = None): self._input = TextStream() def listen(): - for output in client.stream_tts_input(self._input, options, voice_engine): + for output in client.stream_tts_input(self._input, options, voice_engine, protocol): q.put(output) q.put(None) diff --git a/pyht/utils.py b/pyht/utils.py index 02d1d7b..fc9778c 100644 --- a/pyht/utils.py +++ b/pyht/utils.py @@ -15,30 +15,143 @@ def prepare_text(text: Union[str, List[str]], remove_ssml_tags: bool = True) -> return text -def get_voice_engine_and_protocol(voice_engine: Optional[str]) -> Tuple[str, str]: - if voice_engine is None: - logging.warning("No voice engine specified; using Play3.0-mini-http") - voice_engine = "Play3.0-mini" - protocol = "http" +def _convert_deprecated_voice_engine(voice_engine: str, protocol: Optional[str]) -> Tuple[str, str]: + _voice_engine, _protocol = voice_engine.rsplit("-", 1) + if not protocol or protocol == _protocol: + logging.warning(f"Voice engine {_voice_engine}-{_protocol} is deprecated; \ + separately pass voice_engine='{_voice_engine}' and protocol='{_protocol}'.") + return _voice_engine, _protocol + else: + raise ValueError(f"Got voice engine of deprecated format {voice_engine} \ + as well as mismatched protocol {protocol}.") + + +def get_voice_engine_and_protocol(voice_engine: Optional[str], protocol: Optional[str]) -> Tuple[str, str]: + if protocol and protocol not in ["http", "ws", "grpc"]: + raise ValueError(f"Invalid protocol: {protocol} (must be http, ws, or grpc).") + + # this is a bunch of tedious backward compatibility + + if not voice_engine: + if not protocol: + logging.warning("No voice engine or protocol specified; using Play3.0-mini-http.") + voice_engine = "Play3.0-mini" + protocol = "http" + elif protocol in ["http", "ws"]: + logging.warning(f"No voice engine specified and protocol is {protocol}; using Play3.0-mini-{protocol}.") + voice_engine = "Play3.0-mini" + elif protocol == "grpc": + logging.warning("No voice engine specified and protocol is grpc; using PlayHT2.0-turbo.") + voice_engine = "PlayHT2.0-turbo" + else: + raise ValueError(f"No voice engine specified and invalid protocol {protocol} (must be http, ws, or grpc).") + elif voice_engine == "PlayHT2.0-turbo": - protocol = "grpc" - elif voice_engine == "Play3.0": - logging.warning("Voice engine Play3.0 is deprecated; use Play3.0-mini-http or Play3.0-mini-ws instead.") - logging.warning("No protocol specified; using HTTP (if not desired, append '-ws' to the voice engine)") - voice_engine = "Play3.0-mini" - protocol = "http" - elif voice_engine == "Play3.0-http": - logging.warning("Voice engine Play3.0-http is deprecated; use Play3.0-mini-http instead.") - voice_engine = "Play3.0-mini" - protocol = "http" - elif voice_engine == "Play3.0-ws": - logging.warning("Voice engine Play3.0-ws is deprecated; use Play3.0-mini-ws instead.") - voice_engine = "Play3.0-mini" - protocol = "ws" - elif voice_engine == "Play3.0-mini" or voice_engine == "PlayDialog" or voice_engine == "PlayDialogMultilingual": - logging.warning("No protocol specified; using HTTP (if not desired, append '-ws' to the voice engine)") - protocol = "http" + if not protocol: + protocol = "grpc" + if protocol != "grpc": + raise ValueError(f"Voice engine PlayHT2.0-turbo does not support protocol {protocol} (must be grpc).") + + elif voice_engine in ["Play3.0-mini", "Play3.0-mini-http", "Play3.0-mini-ws", "Play3.0-mini-grpc", + "Play3.0", "Play3.0-http", "Play3.0-ws", "Play3.0-grpc"]: + if "mini" not in voice_engine: + logging.warning("Voice engine Play3.0 is deprecated; use Play3.0-mini.") + voice_engine = voice_engine.replace("Play3.0", "Play3.0-mini") + if voice_engine == "Play3.0-mini": + if not protocol: + logging.warning("No protocol specified; using http") + protocol = "http" + if protocol not in ["http", "ws", "grpc"]: + raise ValueError(f"Voice engine Play3.0-mini does not support protocol {protocol} \ + (must be http, ws, or grpc [grpc for on-prem customers only]).") + else: + voice_engine, protocol = _convert_deprecated_voice_engine(voice_engine, protocol) + + elif voice_engine in ["PlayDialog", "PlayDialog-http", "PlayDialog-ws", "PlayDialogMultilingual", + "PlayDialogMultilingual-http", "PlayDialogMultilingual-ws"]: + if voice_engine in ["PlayDialog", "PlayDialogMultilingual"]: + if not protocol: + logging.warning("No protocol specified; using http") + protocol = "http" + if protocol not in ["http", "ws"]: + raise ValueError(f"Voice engine {voice_engine} does not support protocol {protocol} \ + (must be http or ws).") + else: + voice_engine, protocol = _convert_deprecated_voice_engine(voice_engine, protocol) + else: - voice_engine, protocol = voice_engine.rsplit("-", 1) + raise ValueError(f"Invalid voice engine: {voice_engine} (must be Play3.0-mini, PlayDialog, \ + PlayDialogMultilingual, or PlayHT2.0-turbo).") return voice_engine, protocol + + +def main(): + assert get_voice_engine_and_protocol(None, "http") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("", "http") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol(None, "ws") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("", "ws") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol(None, None) == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("", None) == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol(None, "") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("", "") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini", "http") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini", "ws") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-mini", "grpc") == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0-mini", None) == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini", "") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini-http", "http") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini-http", None) == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini-http", "") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-mini-ws", "ws") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-mini-ws", None) == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-mini-ws", "") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-mini-grpc", "grpc") == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0-mini-grpc", None) == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0-mini-grpc", "") == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0", "http") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0", "ws") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0", "grpc") == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0", None) == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0", "") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-http", "http") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-http", None) == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-http", "") == ("Play3.0-mini", "http") + assert get_voice_engine_and_protocol("Play3.0-ws", "ws") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-ws", None) == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-ws", "") == ("Play3.0-mini", "ws") + assert get_voice_engine_and_protocol("Play3.0-grpc", "grpc") == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0-grpc", None) == ("Play3.0-mini", "grpc") + assert get_voice_engine_and_protocol("Play3.0-grpc", "") == ("Play3.0-mini", "grpc") + + assert get_voice_engine_and_protocol("PlayDialog", "http") == ("PlayDialog", "http") + assert get_voice_engine_and_protocol("PlayDialog", "ws") == ("PlayDialog", "ws") + assert get_voice_engine_and_protocol("PlayDialog", None) == ("PlayDialog", "http") + assert get_voice_engine_and_protocol("PlayDialog", "") == ("PlayDialog", "http") + assert get_voice_engine_and_protocol("PlayDialog-http", "http") == ("PlayDialog", "http") + assert get_voice_engine_and_protocol("PlayDialog-http", None) == ("PlayDialog", "http") + assert get_voice_engine_and_protocol("PlayDialog-http", "") == ("PlayDialog", "http") + assert get_voice_engine_and_protocol("PlayDialog-ws", "ws") == ("PlayDialog", "ws") + assert get_voice_engine_and_protocol("PlayDialog-ws", None) == ("PlayDialog", "ws") + assert get_voice_engine_and_protocol("PlayDialog-ws", "") == ("PlayDialog", "ws") + + assert get_voice_engine_and_protocol("PlayDialogMultilingual", "http") == ("PlayDialogMultilingual", "http") + assert get_voice_engine_and_protocol("PlayDialogMultilingual", "ws") == ("PlayDialogMultilingual", "ws") + assert get_voice_engine_and_protocol("PlayDialogMultilingual", None) == ("PlayDialogMultilingual", "http") + assert get_voice_engine_and_protocol("PlayDialogMultilingual", "") == ("PlayDialogMultilingual", "http") + assert get_voice_engine_and_protocol("PlayDialogMultilingual-http", "http") == ("PlayDialogMultilingual", "http") + assert get_voice_engine_and_protocol("PlayDialogMultilingual-http", None) == ("PlayDialogMultilingual", "http") + assert get_voice_engine_and_protocol("PlayDialogMultilingual-http", "") == ("PlayDialogMultilingual", "http") + assert get_voice_engine_and_protocol("PlayDialogMultilingual-ws", "ws") == ("PlayDialogMultilingual", "ws") + assert get_voice_engine_and_protocol("PlayDialogMultilingual-ws", None) == ("PlayDialogMultilingual", "ws") + assert get_voice_engine_and_protocol("PlayDialogMultilingual-ws", "") == ("PlayDialogMultilingual", "ws") + + assert get_voice_engine_and_protocol(None, "grpc") == ("PlayHT2.0-turbo", "grpc") + assert get_voice_engine_and_protocol("", "grpc") == ("PlayHT2.0-turbo", "grpc") + assert get_voice_engine_and_protocol("PlayHT2.0-turbo", "grpc") == ("PlayHT2.0-turbo", "grpc") + assert get_voice_engine_and_protocol("PlayHT2.0-turbo", None) == ("PlayHT2.0-turbo", "grpc") + assert get_voice_engine_and_protocol("PlayHT2.0-turbo", "") == ("PlayHT2.0-turbo", "grpc") + + +if __name__ == "__main__": + main()