-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
198 lines (173 loc) · 7.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python
# coding: utf-8
import json
import logging
import os
import sys
import time
import requests
import streamlit as st
# Configure logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
format="[%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")
logger = logging.getLogger(__name__)
# Set page configuration
st.set_page_config(
page_title="Text to Avatar Video Synthesis",
layout="centered", # Wide layout
initial_sidebar_state="expanded" # Expanded sidebar
)
# Image above the sidebar
image_path = os.path.join(os.path.dirname(__file__), 'assets/avatar.png') # Assuming avtar.png is in the same directory as this script
logo_path = os.path.join(os.path.dirname(__file__), 'assets/logo.png') # Assuming logo.png is in the same directory as this script
image_url = st.image(image_path, width=400)
st.sidebar.image(logo_path, caption='Avatar Image', width=100)
# CSS for styling
st.markdown(
"""
<style>
.st-bf {
background-color: #f0f0f0; /* Light gray background for main content */
padding: 10px;
border-radius: 5px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); /* Soft shadow */
}
.st-ei {
background-color: #ffffff; /* White background for sidebar */
padding: 10px;
border-radius: 5px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); /* Soft shadow */
}
</style>
""",
unsafe_allow_html=True
)
# Information Pane
st.sidebar.title("Information")
st.sidebar.markdown(
"""
<div class="st-ei">
<p>In order to use this service, you must have an active Azure Service. Create a service group (minimum plan is Standard. Free subscription is ok, but need to select standard - pay as you go).</p>
<hr style="border-top: 1px solid #ddd;"> <!-- Horizontal line -->
<p>Then only the speech service of Azure shows the dashboard with the "Text to Speech with Avatar" option.</p>
<hr style="border-top: 1px solid #ddd;"> <!-- Horizontal line -->
<p>Copy the key, and your region (Region must be any of the 3 - West US 2, West Europe, and Southeast Asia).</p>
<hr style="border-top: 1px solid #ddd;"> <!-- Horizontal line -->
<p>Then play with our platform.</p>
<hr style="border-top: 1px solid #ddd;"> <!-- Horizontal line -->
<p>Refer Azure Example: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/yulin/batch-avatar/samples/batch-avatar/python/synthesis.py</p>
</div>
""",
unsafe_allow_html=True
)
# Input fields
SUBSCRIPTION_KEY = st.text_input("Enter your Speech API subscription key:", type="password")
# Predefined list of regions
regions = [
"westus2", "westeurope", "southeastasia"
]
SERVICE_REGION = st.selectbox("Select your Speech API service region:", regions)
NAME = "Simple avatar synthesis"
DESCRIPTION = "Simple avatar synthesis description"
# The service host suffix.
SERVICE_HOST = "customvoice.api.speech.microsoft.com"
def submit_synthesis(text):
if not (SERVICE_REGION and SUBSCRIPTION_KEY and text):
st.error('Please fill in all required fields.')
return
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis/talkingavatar'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY,
'Content-Type': 'application/json'
}
payload = {
'displayName': NAME,
'description': DESCRIPTION,
"textType": "PlainText",
'synthesisConfig': {
"voice": "en-US-JennyNeural",
},
'inputs': [
{
"text": text,
},
],
"properties": {
"customized": False,
"talkingAvatarCharacter": "lisa",
"talkingAvatarStyle": "graceful-sitting",
"videoFormat": "webm",
"videoCodec": "vp9",
"subtitleType": "soft_embedded",
"backgroundColor": "transparent",
}
}
response = requests.post(url, json.dumps(payload), headers=header)
if response.status_code < 400:
logger.info('Batch avatar synthesis job submitted successfully')
logger.info(f'Job ID: {response.json()["id"]}')
return response.json()["id"]
else:
logger.error(f'Failed to submit batch avatar synthesis job: {response.text}')
st.error(f'Failed to submit batch avatar synthesis job: {response.text} Check SUBSCRIPTION_KEY or REGION')
def get_synthesis(job_id):
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis/talkingavatar/{job_id}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
logger.debug('Get batch synthesis job successfully')
logger.debug(response.json())
if response.json()['status'] == 'Succeeded':
logger.info(f'Batch synthesis job succeeded, download URL: {response.json()["outputs"]["result"]}')
return response.json()["outputs"]["result"]
return response.json()['status']
else:
logger.error(f'Failed to get batch synthesis job: {response.text}')
st.error(f'Failed to get batch synthesis job: {response.text}')
def list_synthesis_jobs(skip: int = 0, top: int = 100):
"""List all batch synthesis jobs in the subscription"""
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis/talkingavatar?skip={skip}&top={top}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
logger.info(f'List batch synthesis jobs successfully, got {len(response.json()["values"])} jobs')
logger.info(response.json())
return response.json()['values']
else:
logger.error(f'Failed to list batch synthesis jobs: {response.text}')
st.error(f'No Avtar Video Jobs Exist')
return []
def main():
st.title("Text to Avatar Video Synthesis")
text = st.text_area("Enter the text to be synthesized into avatar video:", "Hi, I'm a virtual assistant created by Microsoft.")
if st.button("Submit"):
job_id = submit_synthesis(text)
if job_id:
st.success(f"Job submitted successfully. Job ID: {job_id}")
st.info("Waiting for job completion...")
status = None
while status not in ['Succeeded', 'Failed']:
status = get_synthesis(job_id)
if status == 'Succeeded':
download_url = get_synthesis(job_id)
st.success(f"Job succeeded! Download your video [here]({download_url})")
break
elif status == 'Failed':
st.error("Job failed.")
break
else:
st.info(f"Job status: {status}. Checking again in 5 seconds...")
time.sleep(5)
st.header("List Batch Synthesis Jobs")
jobs = list_synthesis_jobs()
if jobs:
st.write(f"Total jobs: {len(jobs)}")
st.write("Job details:")
for job in jobs:
st.write(job)
if __name__ == '__main__':
main()