diff --git a/.env.example b/.env.example index 047e2d7..caf7fd2 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,7 @@ OPENAI_API_KEY=your_api_key LANGSMITH_API_KEY=your_langsmith_api_key #Find it here: https://smith.langchain.com +PORT=3000 +NARAKEET_API_KEY=your_narakeet_api_key #FLASK_ENV=development #Optional if you want docker to reload flask when you save your code. #LANGSMITH_API_KEY=your_api_key #optional. Let's you debug using langsmith #LANGCHAIN_PROJECT=your_project_name #pops up in langsmith dashboard @@ -7,4 +9,4 @@ LANGSMITH_API_KEY=your_langsmith_api_key #Find it here: https://smith.langchain. #NARAKEET_API_KEY=your_api_key #Needed for the AI to generate videos. The tool will be disabled if there's no api key #PERPLEXITY_API_KEY=your_api_key #Needed for the AI to generate videos. The tool will be disabled if there's no api key #GOOGLE_AUTH_KEY=your_google_auth_key #Needed for the AI to access your google calendar. The tool will be disabled if there's no api key -#GOOGLE_CALENDAR_ID=your_google_calendar_id #Needed for the AI to access your google calendar. The tool will be disabled if there's no api key \ No newline at end of file +#GOOGLE_CALENDAR_ID=your_google_calendar_id #Needed for the AI to access your google calendar. The tool will be disabled if there's no api key diff --git a/.gitignore b/.gitignore index e0890d0..81cae66 100644 --- a/.gitignore +++ b/.gitignore @@ -150,3 +150,7 @@ dmypy.json *.webm #calender json /core/tools/calendarjson + + +#Redis data +redis_data diff --git a/core/static/index.html b/core/static/index.html index b2e43a0..656951c 100644 --- a/core/static/index.html +++ b/core/static/index.html @@ -12,6 +12,7 @@ + diff --git a/core/static/tts.js b/core/static/tts.js new file mode 100644 index 0000000..9c73df6 --- /dev/null +++ b/core/static/tts.js @@ -0,0 +1,187 @@ +// TODO: Remove random debugging stuff +// Check if socket already exists, if not create it +const ttsSocket = (() => { + const config = { + websocketServer: 'http://localhost:5000' + }; + + return io(config.websocketServer, { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionAttempts: 5, + reconnectionDelay: 1000, + timeout: 10000, + autoConnect: true, + }); +})(); + +console.log('Attempting to connect to server...'); + +let audioContext; +let isProcessing = false; +let audioQueue = []; +let expectedSentenceId = 1; + +// Update all socket references to ttsSocket +ttsSocket.onAny((eventName, ...args) => { + console.log(`Received event: ${eventName}`, args); +}); + +ttsSocket.on('connecting', () => { + console.log('Attempting to connect...'); +}); + +ttsSocket.on('connect', () => { + console.log('Connected to remote WebSocket server:', ttsSocket.io.uri); + console.log('Connected to server with ID:', ttsSocket.id); + console.log('Transport type:', ttsSocket.io.engine.transport.name); +}); + +ttsSocket.on('connect_error', (error) => { + console.error('Connection error:', error); + console.log('Failed connecting to:', ttsSocket.io.uri); + console.log('Transport type:', ttsSocket.io.engine.transport.name); +}); + +ttsSocket.on('connect_timeout', () => { + console.error('Connection timeout'); +}); + +ttsSocket.on('reconnect_attempt', (attemptNumber) => { + console.log(`Reconnection attempt ${attemptNumber}`); +}); + +ttsSocket.on('disconnect', () => { + console.log('Disconnected from server'); +}); + +async function initAudioContext() { + audioContext = new (window.AudioContext || window.webkitAudioContext)(); +} + +async function processAudioChunk(audioData, sentenceId) { + try { + console.log(`Processing audio chunk for sentence ${sentenceId}`); + const arrayBuffer = new Uint8Array(audioData).buffer; + const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); + + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); + + // Add an event listener for when the audio finishes playing + source.onended = () => { + console.log(`Finished playing sentence ${sentenceId}`); + expectedSentenceId++; + isProcessing = false; + processQueuedAudio(); // Process next chunk if available + }; + + source.start(); + isProcessing = true; + + } catch (error) { + console.error('Error processing audio chunk:', error); + isProcessing = false; + processQueuedAudio(); // Try next chunk on error + } +} + +function processQueuedAudio() { + if (isProcessing || audioQueue.length === 0) return; + + // Sort queue by sentence ID + audioQueue.sort((a, b) => a.sentenceId - b.sentenceId); + + // Process next chunk if it matches expected ID + const nextChunk = audioQueue[0]; + if (nextChunk.sentenceId === expectedSentenceId) { + audioQueue.shift(); // Remove from queue + processAudioChunk(nextChunk.audioData, nextChunk.sentenceId); + } +} + +// Socket.IO event handler +ttsSocket.on('audio_stream', async (data) => { + console.log('Received audio_stream event:', { + sentenceId: data.sentence_id, + dataLength: data.audio_data.length + }); + + if (!audioContext) { + console.log('Initializing audio context'); + await initAudioContext(); + } + + const audioData = new Uint8Array(data.audio_data); + const sentenceId = data.sentence_id; + + // Reset state if this is the start of a new generation + if (sentenceId === 1) { + console.log('New text generation - resetting client state'); + expectedSentenceId = 1; + audioQueue = []; + isProcessing = false; + } + + console.log(`Queueing audio chunk ${sentenceId}`); + + // Queue the audio chunk + audioQueue.push({ + audioData: audioData, + sentenceId: sentenceId, + timestamp: Date.now() + }); + + console.log(`Current queue length: ${audioQueue.length}`); + // Try to process queued audio + processQueuedAudio(); +}); + +// Initialize audio context on user interaction +document.addEventListener('click', async () => { + if (!audioContext) { + await initAudioContext(); + } + if (audioContext.state === 'suspended') { + await audioContext.resume(); + } +}); + +ttsSocket.on('test', (data) => { + console.log('Received test message:', data); +}); + +ttsSocket.on('connect', () => { + console.log('Connected to remote WebSocket server:', ttsSocket.io.uri); + console.log('Connected to server with ID:', ttsSocket.id); + console.log('Transport type:', ttsSocket.io.engine.transport.name); +}); + +ttsSocket.on('connect_error', (error) => { + console.error('Connection error:', error); + console.log('Failed connecting to:', ttsSocket.io.uri); + console.log('Transport type:', ttsSocket.io.engine.transport.name); +}); + +ttsSocket.on('connect_timeout', () => { + console.error('Connection timeout'); +}); + +ttsSocket.on('reconnect_attempt', (attemptNumber) => { + console.log(`Reconnection attempt ${attemptNumber}`); +}); + +ttsSocket.on('disconnect', () => { + console.log('Disconnected from server'); +}); + +ttsSocket.on('connect', () => { + console.log('Connected to remote WebSocket server:', ttsSocket.io.uri); + console.log('Transport type:', ttsSocket.io.engine.transport.name); +}); + +ttsSocket.on('connect_error', (error) => { + console.error('Connection error:', error); + console.log('Failed connecting to:', ttsSocket.io.uri); +}); \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index f705a3f..3e4ba0d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,6 +44,38 @@ services: stop_signal: SIGINT ports: - "3001:3001" + redis: + image: redis:latest + container_name: redis_audio + ports: + - "6379:6379" + command: > + redis-server + --appendonly yes + --save 60 1 + --save 300 100 + --save 900 1000 + --maxmemory 1000mb + volumes: + - ./redis_data:/data + tts: + build: + context: ./textToSpeech + dockerfile: Dockerfile + env_file: + - .env + ports: + - "5000:5000" + environment: + - REDIS_URL=redis://redis:6379 + - NARKEE_API_KEY=${NARKEE_API_KEY} + depends_on: + - redis + + + + + networks: backend: diff --git a/textToSpeech/Dockerfile b/textToSpeech/Dockerfile index bf89f0f..a590a13 100644 --- a/textToSpeech/Dockerfile +++ b/textToSpeech/Dockerfile @@ -1,25 +1,8 @@ -# Use an official Python runtime as a parent image -FROM python:3.9-slim - -# Set the working directory in the container +FROM python:3.12-alpine WORKDIR /app +COPY requirements.txt . +RUN pip install -r requirements.txt -# Copy the current directory contents into the container at /app -COPY . /app - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - espeak-ng \ - libespeak-ng1 \ - ffmpeg \ - && rm -rf /var/lib/apt/lists/* - -# Install any needed packages specified in requirements.txt -RUN pip install --no-cache-dir -r requirements.txt - -# Make port 5000 available to the world outside this container -EXPOSE 5000 - -# Run ttssend.py when the container launches -CMD ["python", "tts_server.py"] +COPY app /app +CMD ["python", "/app/app.py"] diff --git a/textToSpeech/app/app.py b/textToSpeech/app/app.py new file mode 100644 index 0000000..640fe5b --- /dev/null +++ b/textToSpeech/app/app.py @@ -0,0 +1,203 @@ +from flask import Flask, render_template, request, jsonify +from flask_socketio import SocketIO, emit +from threading import Thread, Lock +from queue import Queue +import tts +import os +from typing import List, Tuple +import logging + +# Global counter for sentence IDs +sentence_counter: int = 0 +counter_lock = Lock() + +# Add these global variables after the existing ones +recent_audio_chunks: dict[int, list] = {} # Store recent chunks +MAX_STORED_CHUNKS: int = 100 # Limit how many chunks we store + + +def split_into_sentences(text: str) -> List[str]: + """Split text into sentences using basic string splitting""" + # Split on common sentence endings + raw_sentences: List[str] = [] + for part in text.replace('!', '.').replace('?', '.').split('.'): + cleaned = part.strip() + if cleaned: # Only add non-empty sentences + raw_sentences.append(cleaned) + return raw_sentences + + +def generator_thread(input_queue: Queue, output_queue: Queue, tts_engine: tts.TTS) -> None: + # TODO port this to asyncio + while True: + item = input_queue.get() + if item is None: + break + sentence_id, sentence = item + audio_data = tts_engine.tts(sentence) + output_queue.put((sentence_id, audio_data)) + + +def streamer_thread(input_queue: Queue) -> None: + expected_id: int = 1 + buffer: dict[int, list] = {} + + while True: + item = input_queue.get() + if item is None: + break + + sentence_id, audio_data = item + # print(f"Got audio chunk {sentence_id}") + + # Reset expected_id if we're starting a new text generation + if sentence_id == 1: + expected_id = 1 + buffer.clear() # Clear any old buffered chunks + print("New text generation - resetting counters") + + # If this is the chunk we're waiting for, emit it + if sentence_id == expected_id: + try: + audio_list = list(audio_data) + # print(f"Emitting chunk {sentence_id} to clients") + socketio.emit('audio_stream', { + 'audio_data': audio_list, + 'sentence_id': sentence_id + }, namespace='/') + # print(f"Emitted chunk {sentence_id}") + expected_id += 1 + + # Process buffered chunks + while expected_id in buffer: + buffered_audio = buffer.pop(expected_id) + socketio.emit('audio_stream', { + 'audio_data': buffered_audio, + 'sentence_id': expected_id + }, namespace='/') + expected_id += 1 + + except Exception as e: + print(f"Error emitting audio: {str(e)}") + import traceback + traceback.print_exc() + + # If this chunk is for a future sentence, buffer it + elif sentence_id > expected_id: + buffer[sentence_id] = list(audio_data) + else: + print(f"Dropping late chunk for sentence {sentence_id}") + + +app: Flask = Flask(__name__) +app.config['SECRET_KEY'] = 'lklsa01lkJASD9012o3khj123l' +socketio: SocketIO = SocketIO( + app, + cors_allowed_origins="*", + logger=False, # Set to True to log hella verbose + engineio_logger=False, # Same with this one + ping_timeout=10, + ping_interval=5, + async_mode='threading', + reconnection=True, + reconnection_attempts=5, + reconnection_delay=1000 +) +input_queue: Queue = Queue() +output_queue: Queue = Queue() + + +@app.route('/') +def index(): + return render_template('index.html') + + +@app.route('/generate', methods=['POST']) +def generate(): + global sentence_counter + try: + data = request.get_json() + text: str = data.get('text', '') + + if not text: + return jsonify({'error': 'No text provided'}), 400 + + sentences: List[str] = split_into_sentences(text) + queue_items: List[Tuple[int, str]] = [] + + with counter_lock: + # Reset sentence counter if it's a new session + sentence_counter = 0 # Reset counter + for sentence in sentences: + sentence_counter += 1 + queue_item = (sentence_counter, sentence) + queue_items.append(queue_item) + input_queue.put(queue_item) + print(f"Queuing sentence {sentence_counter}: { + sentence[:30]}...") # Debug print + + return jsonify({ + 'message': 'Text queued successfully', + 'sentences': len(sentences), + 'queue_items': queue_items + }), 200 + + except Exception as e: + print(f"Error in generate endpoint: {str(e)}") + return jsonify({'error': str(e)}), 500 + + +@socketio.on('audio_data') +def handle_audio_data(data: bytes) -> None: + # Broadcast the received audio data to all connected clients + emit('audio_stream', data, broadcast=True) + + +# Add this new route to test Socket.IO +@socketio.on('connect') +def handle_connect(): + client_id = request.sid + print(f"New client connected: {client_id}") # More detailed connection log + socketio.emit('test', {'data': 'Test message'}) + + +# Add a health check route +# For docker healthchecks +@app.route('/health') +def health_check(): + return jsonify({'status': 'healthy'}), 200 + + +if __name__ == '__main__': + import logging + logging.getLogger('werkzeug').setLevel(logging.INFO) + logging.getLogger('engineio').setLevel(logging.INFO) + logging.getLogger('socketio').setLevel(logging.INFO) + + print("Starting server...") + + redis_url = os.getenv("REDIS_URL") + if not redis_url: + raise ValueError("REDIS_URL is not set") + + cache = tts.Cache(redis_url, max_size_mb=1000) + tts_narakeet = tts.Narakeet( + api_key=os.getenv("NARAKEET_API_KEY"), cache=cache) + + if os.getenv("DEBUG") == "True": + debug = True + else: + debug = False + + generator_thread = Thread(target=generator_thread, + args=(input_queue, output_queue, tts_narakeet)) + generator_thread.daemon = True # Make thread daemon + generator_thread.start() + + streamer_thread = Thread(target=streamer_thread, args=(output_queue,)) + streamer_thread.daemon = True # Make thread daemon + streamer_thread.start() + + print("All threads started, running server...") + socketio.run(app, debug=debug, host='0.0.0.0', + port=5000, allow_unsafe_werkzeug=True) diff --git a/textToSpeech/app/static/js/audio.js b/textToSpeech/app/static/js/audio.js new file mode 100644 index 0000000..dfac5b6 --- /dev/null +++ b/textToSpeech/app/static/js/audio.js @@ -0,0 +1,149 @@ +const socket = io({ + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionAttempts: 5, + reconnectionDelay: 1000, + timeout: 10000, + autoConnect: true +}); + +console.log('Attempting to connect to server...'); + +let audioContext; +let isProcessing = false; +let audioQueue = []; +let expectedSentenceId = 1; + +socket.onAny((eventName, ...args) => { + console.log(`Received event: ${eventName}`, args); +}); + +async function initAudioContext() { + audioContext = new (window.AudioContext || window.webkitAudioContext)(); +} + +async function processAudioChunk(audioData, sentenceId) { + try { + console.log(`Processing audio chunk for sentence ${sentenceId}`); + const arrayBuffer = new Uint8Array(audioData).buffer; + const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); + + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); + + // Add an event listener for when the audio finishes playing + source.onended = () => { + console.log(`Finished playing sentence ${sentenceId}`); + expectedSentenceId++; + isProcessing = false; + processQueuedAudio(); // Process next chunk if available + }; + + source.start(); + isProcessing = true; + + } catch (error) { + console.error('Error processing audio chunk:', error); + isProcessing = false; + processQueuedAudio(); // Try next chunk on error + } +} + +function processQueuedAudio() { + if (isProcessing || audioQueue.length === 0) return; + + // Sort queue by sentence ID + audioQueue.sort((a, b) => a.sentenceId - b.sentenceId); + + // Process next chunk if it matches expected ID + const nextChunk = audioQueue[0]; + if (nextChunk.sentenceId === expectedSentenceId) { + audioQueue.shift(); // Remove from queue + processAudioChunk(nextChunk.audioData, nextChunk.sentenceId); + } +} + +// Socket.IO event handler +socket.on('audio_stream', async (data) => { + console.log('Received audio_stream event:', { + sentenceId: data.sentence_id, + dataLength: data.audio_data.length + }); + + if (!audioContext) { + console.log('Initializing audio context'); + await initAudioContext(); + } + + const audioData = new Uint8Array(data.audio_data); + const sentenceId = data.sentence_id; + + // Reset state if this is the start of a new generation + if (sentenceId === 1) { + console.log('New text generation - resetting client state'); + expectedSentenceId = 1; + audioQueue = []; + isProcessing = false; + } + + console.log(`Queueing audio chunk ${sentenceId}`); + + // Queue the audio chunk + audioQueue.push({ + audioData: audioData, + sentenceId: sentenceId, + timestamp: Date.now() + }); + + console.log(`Current queue length: ${audioQueue.length}`); + // Try to process queued audio + processQueuedAudio(); +}); + +// Initialize audio context on user interaction +document.addEventListener('click', async () => { + if (!audioContext) { + await initAudioContext(); + } + if (audioContext.state === 'suspended') { + await audioContext.resume(); + } +}); + +socket.on('test', (data) => { + console.log('Received test message:', data); + status.textContent = "Status: Received test message"; +}); + +socket.on('connecting', () => { + console.log('Attempting to connect...'); + status.textContent = "Status: Attempting to connect..."; +}); + +socket.on('connect_error', (error) => { + console.error('Connection error:', error); + console.log('Transport type:', socket.io.engine.transport.name); + status.textContent = `Status: Connection error - ${error.message}`; +}); + +socket.on('connect_timeout', () => { + console.error('Connection timeout'); + status.textContent = "Status: Connection timeout"; +}); + +socket.on('reconnect_attempt', (attemptNumber) => { + console.log(`Reconnection attempt ${attemptNumber}`); + status.textContent = `Status: Reconnection attempt ${attemptNumber}`; +}); + +socket.on('connect', () => { + console.log('Connected to server with ID:', socket.id); + console.log('Transport type:', socket.io.engine.transport.name); + status.textContent = "Status: Connected to server. Click anywhere to enable audio."; +}); + +socket.on('disconnect', () => { + console.log('Disconnected from server'); + status.textContent = "Status: Disconnected from server"; +}); \ No newline at end of file diff --git a/textToSpeech/app/templates/index.html b/textToSpeech/app/templates/index.html new file mode 100644 index 0000000..445476f --- /dev/null +++ b/textToSpeech/app/templates/index.html @@ -0,0 +1,73 @@ + + +
+