Skip to content

Commit

Permalink
feat: [google-cloud-texttospeech] StreamingSynthesize now supports op…
Browse files Browse the repository at this point in the history
…us (#13406)

- [ ] Regenerate this pull request now.

PiperOrigin-RevId: 712905945

Source-Link:
googleapis/googleapis@bd72915

Source-Link:
googleapis/googleapis-gen@fc8216c
Copy-Tag:
eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiZmM4MjE2Y2ZkOGEzMjcxM2Y1MjAyZmViMzJmMTliOTI3YTEzOTljNCJ9

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Victor Chudnovsky <vchudnov@google.com>
  • Loading branch information
3 people authored Jan 8, 2025
1 parent 2c1e359 commit 9f278d6
Show file tree
Hide file tree
Showing 17 changed files with 58 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
ListVoicesResponse,
MultiSpeakerMarkup,
SsmlVoiceGender,
StreamingAudioConfig,
StreamingSynthesisInput,
StreamingSynthesizeConfig,
StreamingSynthesizeRequest,
Expand Down Expand Up @@ -71,6 +72,7 @@
"ListVoicesRequest",
"ListVoicesResponse",
"MultiSpeakerMarkup",
"StreamingAudioConfig",
"StreamingSynthesisInput",
"StreamingSynthesizeConfig",
"StreamingSynthesizeRequest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
__version__ = "2.23.0" # {x-release-please-version}
__version__ = "0.0.0" # {x-release-please-version}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
ListVoicesResponse,
MultiSpeakerMarkup,
SsmlVoiceGender,
StreamingAudioConfig,
StreamingSynthesisInput,
StreamingSynthesizeConfig,
StreamingSynthesizeRequest,
Expand Down Expand Up @@ -64,6 +65,7 @@
"ListVoicesResponse",
"MultiSpeakerMarkup",
"SsmlVoiceGender",
"StreamingAudioConfig",
"StreamingSynthesisInput",
"StreamingSynthesizeConfig",
"StreamingSynthesizeRequest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
__version__ = "2.23.0" # {x-release-please-version}
__version__ = "0.0.0" # {x-release-please-version}
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ async def sample_synthesize_speech():
voice.language_code = "language_code_value"
audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"
request = texttospeech_v1.SynthesizeSpeechRequest(
input=input,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ def sample_synthesize_speech():
voice.language_code = "language_code_value"
audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"
request = texttospeech_v1.SynthesizeSpeechRequest(
input=input,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ async def sample_synthesize_long_audio():
input.text = "text_value"
audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"
voice = texttospeech_v1.VoiceSelectionParams()
voice.language_code = "language_code_value"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ def sample_synthesize_long_audio():
input.text = "text_value"
audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"
voice = texttospeech_v1.VoiceSelectionParams()
voice.language_code = "language_code_value"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ListVoicesResponse,
MultiSpeakerMarkup,
SsmlVoiceGender,
StreamingAudioConfig,
StreamingSynthesisInput,
StreamingSynthesizeConfig,
StreamingSynthesizeRequest,
Expand All @@ -50,6 +51,7 @@
"ListVoicesRequest",
"ListVoicesResponse",
"MultiSpeakerMarkup",
"StreamingAudioConfig",
"StreamingSynthesisInput",
"StreamingSynthesizeConfig",
"StreamingSynthesizeRequest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"CustomVoiceParams",
"VoiceCloneParams",
"SynthesizeSpeechResponse",
"StreamingAudioConfig",
"StreamingSynthesizeConfig",
"StreamingSynthesisInput",
"StreamingSynthesizeRequest",
Expand Down Expand Up @@ -103,13 +104,19 @@ class AudioEncoding(proto.Enum):
8-bit samples that compand 14-bit audio
samples using G.711 PCMU/A-law. Audio content
returned as ALAW also contains a WAV header.
PCM (7):
Uncompressed 16-bit signed little-endian
samples (Linear PCM). Note that as opposed to
LINEAR16, audio will not be wrapped in a WAV (or
any other) header.
"""
AUDIO_ENCODING_UNSPECIFIED = 0
LINEAR16 = 1
MP3 = 2
OGG_OPUS = 3
MULAW = 5
ALAW = 6
PCM = 7


class ListVoicesRequest(proto.Message):
Expand Down Expand Up @@ -657,6 +664,30 @@ class SynthesizeSpeechResponse(proto.Message):
)


class StreamingAudioConfig(proto.Message):
r"""Description of the desired output audio data.
Attributes:
audio_encoding (google.cloud.texttospeech_v1.types.AudioEncoding):
Required. The format of the audio byte stream. For now,
streaming only supports PCM and OGG_OPUS. All other
encodings will return an error.
sample_rate_hertz (int):
Optional. The synthesis sample rate (in
hertz) for this audio.
"""

audio_encoding: "AudioEncoding" = proto.Field(
proto.ENUM,
number=1,
enum="AudioEncoding",
)
sample_rate_hertz: int = proto.Field(
proto.INT32,
number=2,
)


class StreamingSynthesizeConfig(proto.Message):
r"""Provides configuration information for the
StreamingSynthesize request.
Expand All @@ -665,13 +696,21 @@ class StreamingSynthesizeConfig(proto.Message):
voice (google.cloud.texttospeech_v1.types.VoiceSelectionParams):
Required. The desired voice of the
synthesized audio.
streaming_audio_config (google.cloud.texttospeech_v1.types.StreamingAudioConfig):
Optional. The configuration of the
synthesized audio.
"""

voice: "VoiceSelectionParams" = proto.Field(
proto.MESSAGE,
number=1,
message="VoiceSelectionParams",
)
streaming_audio_config: "StreamingAudioConfig" = proto.Field(
proto.MESSAGE,
number=4,
message="StreamingAudioConfig",
)


class StreamingSynthesisInput(proto.Message):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
__version__ = "2.23.0" # {x-release-please-version}
__version__ = "0.0.0" # {x-release-please-version}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-texttospeech",
"version": "2.23.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-texttospeech",
"version": "2.23.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def sample_synthesize_long_audio():
input.text = "text_value"

audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"

voice = texttospeech_v1.VoiceSelectionParams()
voice.language_code = "language_code_value"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def sample_synthesize_long_audio():
input.text = "text_value"

audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"

voice = texttospeech_v1.VoiceSelectionParams()
voice.language_code = "language_code_value"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ async def sample_synthesize_speech():
voice.language_code = "language_code_value"

audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"

request = texttospeech_v1.SynthesizeSpeechRequest(
input=input,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def sample_synthesize_speech():
voice.language_code = "language_code_value"

audio_config = texttospeech_v1.AudioConfig()
audio_config.audio_encoding = "ALAW"
audio_config.audio_encoding = "PCM"

request = texttospeech_v1.SynthesizeSpeechRequest(
input=input,
Expand Down

0 comments on commit 9f278d6

Please sign in to comment.