-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathapp.py
169 lines (154 loc) Β· 6.98 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import streamlit as st
import requests
import json
from urllib.parse import unquote
import time
import logging
from dotenv import load_dotenv
import os
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")
logger = logging.getLogger(__name__)
load_dotenv()
SUBSCRIPTION_KEY = os.getenv('SUBSCRIPTION_KEY')
SERVICE_REGION = os.getenv('SERVICE_REGION')
SERVICE_HOST="customvoice.api.speech.microsoft.com"
st.set_page_config(page_title="Talking Avatar", page_icon="π£οΈ",initial_sidebar_state="auto",layout='centered')
NAME = "Text-to-Speech"
DESCRIPTION = "Using Azure AI Services"
lang_voices = {
'Arabic': ['ar-SA', 'ar-SA-ZariyahNeural'],
'Bahasa Indonesian': ['id-ID', 'id-ID-GadisNeural'],
'Bengali': ['bn-IN', 'bn-IN-TanishaaNeural'],
'Chinese Mandarin': ['zh-CN', 'zh-CN-XiaoxiaoNeural'],
'Dutch': ['nl-NL', 'nl-NL-FennaNeural'],
'English': ['en-US', 'en-US-AvaNeural'],
'French': ['fr-FR', 'fr-FR-DeniseNeural'],
'German': ['de-DE', 'de-DE-KatjaNeural'],
'Hindi': ['hi-IN', 'hi-IN-SwaraNeural'],
'Italian': ['it-IT', 'it-IT-ElsaNeural'],
'Japanese': ['ja-JP', 'ja-JP-NanamiNeural'],
'Korean': ['ko-KR', 'ko-KR-SunHiNeural'],
'Russian': ['ru-RU', 'ru-RU-SvetlanaNeural'],
'Spanish': ['es-ES', 'es-ES-ElviraNeural'],
'Telugu': ['te-IN', 'te-IN-ShrutiNeural']
}
with st.sidebar:
st.markdown("[Source Code](https://github.com/Sgvkamalakar/Azure-Talking-Avatar)")
st.markdown("[Explore my Codes](https://github.com/sgvkamalakar)")
st.markdown("[Connect with me on LinkedIn](https://www.linkedin.com/in/sgvkamlakar)")
st.markdown("Learn more about Text-to-Speech Avatar on Microsoft Azure [here](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech-avatar/what-is-text-to-speech-avatar)")
st.markdown("Developed with π by Kamalakar")
def submit_synthesis(text,voice,style):
url = f'https://{SERVICE_REGION}.customvoice.api.speech.microsoft.com/api/texttospeech/3.1-preview1/batchsynthesis/talkingavatar'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY,
'Content-Type':'application/json'
}
payload = {
'displayName': NAME,
'description': DESCRIPTION,
"textType": "PlainText",
'synthesisConfig': {
"voice": voice,
},
'customVoices': {},
"inputs": [
{
"text": text,
},
],
"properties": {
"customized": False,
"talkingAvatarCharacter": "lisa",
"talkingAvatarStyle": style,
"videoFormat": "webm",
"videoCodec": "vp9",
"subtitleType": "soft_embedded",
"backgroundColor": "transparent",
}
}
response = requests.post(url, json.dumps(payload), headers=header)
if response.status_code < 400:
logger.info('Batch avatar synthesis job submitted successfully')
logger.info(f'Job ID: {response.json()["id"]}')
return response.json()["id"]
else:
logger.error(f'Failed to submit batch avatar synthesis job: {response.text}')
return None
def get_content_from_url(decoded_url):
try:
response = requests.get(decoded_url)
if response.status_code == 200:
return response.content # Return the content of the response
else:
return f"Error: Unable to retrieve content from URL. Status code: {response.status_code}"
except Exception as e:
return f"Error: {str(e)}"
def get_synthesis(job_id):
url = f'https://{SERVICE_REGION}.customvoice.api.speech.microsoft.com/api/texttospeech/3.1-preview1/batchsynthesis/talkingavatar/{job_id}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
logger.debug('Get batch synthesis job successfully')
logger.debug(response.json())
if response.json()['status'] == 'Succeeded':
logger.info(f'Batch synthesis job succeeded. Download URL: {response.json()["outputs"]["result"]}')
video_url = response.json()["outputs"]["result"]
decoded_url = unquote(video_url)
con=get_content_from_url(decoded_url)
if len(con)<100:
st.error("An error occurred while processing the request. Please try again later π’")
return 0
else:
st.markdown(f"You can download the synthesized avatar video [here]({decoded_url}).")
st.video(decoded_url)
return 1
else:
logger.error(f'Failed to get batch synthesis job: {response.text}')
def list_synthesis_jobs(skip: int = 0, top: int = 100):
url = f'https://{SERVICE_REGION}.customvoice.api.speech.microsoft.com/api/texttospeech/3.1-preview1/batchsynthesis/talkingavatar?skip={skip}&top={top}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
logger.info(f'List batch synthesis jobs successfully, got {len(response.json()["values"])} jobs')
logger.info(response.json())
else:
logger.error(f'Failed to list batch synthesis jobs: {response.text}')
def main():
st.title("Azure Text-to-Talking Avatar")
col1,col2=st.columns(2)
with col1:
lang=st.selectbox('Choose the language',list(lang_voices.keys()), index=5)
with col2:
style=st.selectbox('Avatar Style',["Casual-Sitting","Graceful-Sitting","Technical-Sitting","Graceful-Standing","Technical-Standing"],index=1)
style=style.lower()
voice=lang_voices[lang][1]
text_input = st.text_area(f'Type text in {lang}')
submit_button = st.button("Submit Job")
st.error("The resource group associated with this project has been deactivated, resulting in the current non-functionality of the app π€§... I apologize for any inconvenience caused π...")
st.info("If you require further assistance or have any questions, feel free to reach out to me at sgvkamalakar@gmail.com")
st.subheader("Sample Video")
st.video("demo/demo.webm")
if submit_button:
if text_input.strip()!='':
with st.spinner("Processing..."):
job_id = submit_synthesis(text_input,voice,style)
if job_id is not None:
while True:
status = get_synthesis(job_id)
if status == 1:
st.success('Batch avatar synthesis job succeeded β
')
break
elif status == 0:
st.error('Uh-oh! The avatar synthesis job took an unexpected turn. β')
break
else:
time.sleep(5)
else:
st.info("Give me something to work with! How about a dazzling sentence? π")
if __name__ == '__main__':
main()