-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpodcast_script.py
158 lines (128 loc) · 5.88 KB
/
podcast_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from hugchat import hugchat
from hugchat.login import Login
import re
import os
import edge_tts
import xml.etree.ElementTree as ET
from pydub import AudioSegment
import asyncio
# Log in to huggingface and grant authorization to huggingchat
EMAIL = "your-email@domain.com" # replace with your Huggingface account
PASSWD = "your-password" #replace with your Huggingface password
cookie_path_dir = "./cookies/" # NOTE: trailing slash (/) is required to avoid errors
sign = Login(EMAIL, PASSWD)
cookies = sign.login(cookie_dir_path=cookie_path_dir, save_cookies=True)
print("Logged in successfully to Hugging Face.")
# Create your ChatBot
chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
print("ChatBot created.")
# Configuration: speaker and language
speaker1 = "Ava" # Change the name here
speaker2 = "Andrew" # Change the name here
lang = "English" # Change the language here
# Voice mapping
voice_map = {
"Ava": "en-US-AvaMultilingualNeural", # Change the edge-tts voice here
"Andrew": "en-US-AndrewMultilingualNeural" # Change the edge-tts voice here
}
# Function to generate SSML markup for a conversation between Ava and Andrew
def generate_ssml_conversation(text, speaker1="Ava", speaker2="Andrew"):
print("Generating SSML conversation...")
# Start a new conversation
chatbot.new_conversation(switch_to=True)
# Instruct the chatbot to create a light-hearted dialogue from the text
dialogue_prompt = (
f"Create a light-hearted conversation between two people based on the following text: '{text}'. "
f"The first person is {speaker1}, and the second person is {speaker2}. They should affirm each other "
f"and include pauses, but do not include stage directions or actions like (smiling) or (pausing). "
f"Let {speaker1} introduce the podcast and {speaker2} at the start. Text in {lang} and at least 10 turns of every speaker."
)
try:
response = chatbot.chat(dialogue_prompt)
dialogue_text = str(response) # Convert response to string if necessary
except hugchat.exceptions.ModelOverloadedError:
print("Error: Model is overloaded, please try again later.")
return None # Return None to indicate failure
# Remove any stage directions or descriptions in parentheses
dialogue_text = re.sub(r'\([^)]*\)', '', dialogue_text).strip()
# Split the dialogue into parts based on the speaker's name
ssml_output = '<speak>\n'
dialogue_lines = dialogue_text.split("\n")
for line in dialogue_lines:
line = line.strip()
if line.startswith(speaker1 + ":"):
# Use speaker1's voice
ssml_output += f'<voice name="{speaker1}">\n'
ssml_output += f' {line.replace(speaker1 + ":", "").strip()}\n'
ssml_output += ' <break time="0.5s"/>\n'
ssml_output += '</voice>\n'
elif line.startswith(speaker2 + ":"):
# Use speaker2's voice
ssml_output += f'<voice name="{speaker2}">\n'
ssml_output += f' {line.replace(speaker2 + ":", "").strip()}\n'
ssml_output += ' <break time="0.3s"/>\n'
ssml_output += '</voice>\n'
ssml_output += '</speak>'
print("SSML conversation generated successfully.")
return ssml_output
# Read the input content from content.txt
print("Reading input content from 'content.txt'...")
with open("content.txt", "r") as file:
text_input = file.read().strip()
# Generate SSML conversation
ssml_conversation = generate_ssml_conversation(text_input)
# Check if SSML conversation generation was successful
if ssml_conversation is None:
print("Exiting the script due to error in SSML generation.")
exit(1) # Exit the script if there was an error
# Save the SSML output to a text file
print("Saving SSML output to 'SSML.txt'...")
with open("SSML.txt", "w") as file:
file.write(ssml_conversation)
print("SSML output has been saved to SSML.txt.")
async def synthesize_text(text, voice_name, filename):
"""Use EdgeTTS to convert text to speech and save it as an MP3 file."""
edge_voice = voice_map.get(voice_name) # Get the right voice
if edge_voice is None:
raise ValueError(f"Unknown voice name: {voice_name}")
print(f"Generating audio for voice: {edge_voice}") # Debug output
print(f"Text: {text}") # Debug output
communicate = edge_tts.Communicate(text, voice=edge_voice, rate="+15%")
await communicate.save(filename)
print(f"Audio saved to '{filename}'.")
def parse_ssml(file_path):
"""Parse the SSML file and extract text and speaker info."""
print(f"Parsing SSML from '{file_path}'...")
tree = ET.parse(file_path)
root = tree.getroot()
segments = []
for elem in root:
if elem.tag == 'voice':
voice_name = elem.attrib['name']
text = ''.join(elem.itertext()).strip()
segments.append((voice_name, text))
print("SSML parsing completed.")
return segments
async def main():
# Parse the SSML output that was saved earlier
segments = parse_ssml("SSML.txt")
print(f"Found {len(segments)} segments to synthesize.")
# Synthesize speech for each segment
for i, (voice_name, text) in enumerate(segments):
mp3_filename = f"output_segment_{i + 1}.mp3"
await synthesize_text(text, voice_name, mp3_filename)
# Combine the segments into a single file
print("Combining audio segments...")
combined = AudioSegment.empty()
for i in range(len(segments)):
mp3_filename = f"output_segment_{i + 1}.mp3"
audio_segment = AudioSegment.from_mp3(mp3_filename)
combined += audio_segment
# Save the final combined audio
combined.export("final_output.mp3", format="mp3")
print("Final audio file 'final_output.mp3' has been created.")
# Remove temporary output MP3s
for i in range(len(segments)):
os.remove(f"output_segment_{i + 1}.mp3")
# Run the main function
asyncio.run(main())