use gpt-4o-mini-tts insteas of n8n

parent 0d5c27e2
...@@ -61,16 +61,18 @@ services: ...@@ -61,16 +61,18 @@ services:
- minio - minio
voice-agent: voice-agent:
build: ./always # path to your Dockerfile folder build: ./voice_agent # path to your Dockerfile folder
container_name: voice-agent container_name: voice-agent
ports: ports:
- "8000:8000" # Expose the FastAPI server - "8000:8000" # Expose the FastAPI server
restart: on-failure restart: always
environment: environment:
MINIO_ENDPOINT: "http://minio:9000" MINIO_ENDPOINT: "http://minio:9000"
MINIO_ACCESS_KEY: "${MINIO_ROOT_USER}" MINIO_ACCESS_KEY: "${MINIO_ROOT_USER}"
MINIO_SECRET_KEY: "${MINIO_ROOT_PASSWORD}" MINIO_SECRET_KEY: "${MINIO_ROOT_PASSWORD}"
N8N_WEBHOOK_URL: "${N8N_WEBHOOK_URL}" N8N_WEBHOOK_URL: "${N8N_WEBHOOK_URL}"
OPENAI_API_KEY: "${OPENAI_API_KEY}"
MINIO_BUCKET: "${MINIO_BUCKET}"
volumes: volumes:
- ./uploads:/app/uploads - ./uploads:/app/uploads
depends_on: depends_on:
......
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Configuration Management # Configuration Management
...@@ -10,6 +14,7 @@ class AppConfig: ...@@ -10,6 +14,7 @@ class AppConfig:
minio_secret_key: str minio_secret_key: str
minio_bucket: str minio_bucket: str
n8n_webhook_url: str n8n_webhook_url: str
openai_api_key: str
@classmethod @classmethod
def from_env(cls) -> 'AppConfig': def from_env(cls) -> 'AppConfig':
...@@ -17,6 +22,7 @@ class AppConfig: ...@@ -17,6 +22,7 @@ class AppConfig:
minio_endpoint=os.getenv("MINIO_ENDPOINT", "http://minio:9000"), minio_endpoint=os.getenv("MINIO_ENDPOINT", "http://minio:9000"),
minio_access_key=os.getenv("MINIO_ACCESS_KEY"), minio_access_key=os.getenv("MINIO_ACCESS_KEY"),
minio_secret_key=os.getenv("MINIO_SECRET_KEY"), minio_secret_key=os.getenv("MINIO_SECRET_KEY"),
minio_bucket="coversation", minio_bucket=os.getenv("MINIO_BUCKET"),
n8n_webhook_url=os.getenv("N8N_WEBHOOK_URL") n8n_webhook_url=os.getenv("N8N_WEBHOOK_URL"),
openai_api_key=os.getenv("OPENAI_API_KEY")
) )
\ No newline at end of file
...@@ -33,11 +33,10 @@ class DIContainer: ...@@ -33,11 +33,10 @@ class DIContainer:
self.response_manager = ResponseManager() self.response_manager = ResponseManager()
self.audio_service = AudioService(self.storage_repo, self.config.minio_bucket) self.audio_service = AudioService(self.storage_repo, self.config.minio_bucket)
self.chat_service = ChatService(self.storage_repo, self.webhook_client, self.response_manager, self.config) self.chat_service = ChatService(self.storage_repo, self.webhook_client, self.response_manager, self.config)
self.webhook_service = WebhookService(self.response_manager, self.audio_service) self.webhook_service = WebhookService(self.response_manager, self.storage_repo, self.config.minio_bucket)
self.response_service = ResponseService(self.response_manager, self.audio_service) self.response_service = ResponseService(self.response_manager, self.audio_service)
self.health_service = HealthService(self.storage_repo, self.config) self.health_service = HealthService(self.storage_repo, self.config)
# FastAPI App Factory # FastAPI App Factory
def create_app() -> FastAPI: def create_app() -> FastAPI:
app = FastAPI(title="Unified Chat API") app = FastAPI(title="Unified Chat API")
......
...@@ -5,3 +5,4 @@ soundfile ...@@ -5,3 +5,4 @@ soundfile
fastapi fastapi
uvicorn[standard] uvicorn[standard]
python-multipart python-multipart
openai
...@@ -17,14 +17,21 @@ class AudioService: ...@@ -17,14 +17,21 @@ class AudioService:
def get_audio_file(self, filename: str) -> str: def get_audio_file(self, filename: str) -> str:
try: try:
# Add the audio/ prefix to match the MinIO structure
minio_file_path = f"audio/{filename}"
print(f"Attempting to download from MinIO: bucket={self.bucket}, path={minio_file_path}")
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
self.storage_repo.download_file(self.bucket, filename, temp_file.name) self.storage_repo.download_file(self.bucket, minio_file_path, temp_file.name)
temp_file.close() temp_file.close()
print(f"Successfully downloaded audio file: {minio_file_path}")
return temp_file.name return temp_file.name
except ClientError as e: except ClientError as e:
print(f"MinIO ClientError: {e}") print(f"MinIO ClientError: {e}")
print(f"Failed to find: bucket={self.bucket}, path=audio/{filename}")
raise HTTPException(status_code=404, detail=f"Audio file '{filename}' not found.") raise HTTPException(status_code=404, detail=f"Audio file '{filename}' not found.")
except Exception as e: except Exception as e:
print(f"An error occurred: {e}") print(f"An error occurred: {e}")
raise HTTPException(status_code=500, detail=f"An error occurred: {e}") raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
\ No newline at end of file
...@@ -3,12 +3,13 @@ from typing import Optional ...@@ -3,12 +3,13 @@ from typing import Optional
class ResponseManager: class ResponseManager:
def __init__(self): def __init__(self):
self._latest_response = {"text": None, "audio_file_path": None, "timestamp": 0} self._latest_response = {"text": None, "audio_filename": None, "timestamp": 0}
def store_response(self, text: str, audio_file_path: Optional[str] = None) -> None: def store_response(self, text: str, audio_filename: Optional[str] = None) -> None:
"""Store response with audio filename instead of file path"""
self._latest_response = { self._latest_response = {
"text": text, "text": text,
"audio_file_path": audio_file_path, "audio_filename": audio_filename,
"timestamp": time.time() "timestamp": time.time()
} }
...@@ -16,7 +17,7 @@ class ResponseManager: ...@@ -16,7 +17,7 @@ class ResponseManager:
return self._latest_response.copy() return self._latest_response.copy()
def clear_response(self) -> None: def clear_response(self) -> None:
self._latest_response = {"text": None, "audio_file_path": None, "timestamp": 0} self._latest_response = {"text": None, "audio_filename": None, "timestamp": 0}
def is_response_fresh(self, max_age_seconds: int = 300) -> bool: def is_response_fresh(self, max_age_seconds: int = 300) -> bool:
return (self._latest_response["text"] and return (self._latest_response["text"] and
......
...@@ -21,8 +21,9 @@ class ResponseService: ...@@ -21,8 +21,9 @@ class ResponseService:
response_data = self.response_manager.get_response() response_data = self.response_manager.get_response()
if response_data["audio_file_path"]: if response_data["audio_filename"]:
file_path = response_data["audio_file_path"] # Download audio file from MinIO using filename
file_path = self.audio_service.get_audio_file(response_data["audio_filename"])
response_text = response_data["text"] response_text = response_data["text"]
self.response_manager.clear_response() self.response_manager.clear_response()
......
...@@ -4,28 +4,102 @@ from typing import Optional ...@@ -4,28 +4,102 @@ from typing import Optional
from enum import Enum from enum import Enum
import sys import sys
import os import os
import time
import tempfile
from openai import OpenAI
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from core import ResponseStatus from core import ResponseStatus
from schemas import WebhookResponse from schemas import WebhookResponse
from services.response_manager import ResponseManager from services.response_manager import ResponseManager
from services.audio_service import AudioService from repositories import StorageRepository
class WebhookService: class WebhookService:
def __init__(self, response_manager: ResponseManager, audio_service: AudioService): def __init__(self, response_manager: ResponseManager, storage_repo: StorageRepository, bucket: str):
self.response_manager = response_manager self.response_manager = response_manager
self.audio_service = audio_service self.storage_repo = storage_repo
self.bucket = bucket
# Initialize OpenAI client
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("Warning: OPENAI_API_KEY not found. TTS functionality will be disabled.")
self.openai_client = None
else:
self.openai_client = OpenAI(api_key=openai_api_key)
def generate_audio_from_text(self, text: str) -> str:
"""Generate audio from text using OpenAI TTS and upload to MinIO"""
if not self.openai_client:
raise HTTPException(status_code=500, detail="OpenAI API key not configured")
try:
# Create temporary file for audio
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
temp_file_path = temp_file.name
temp_file.close()
print(f"Generating TTS audio for text: {text[:50]}...")
# Generate audio using OpenAI TTS
with self.openai_client.audio.speech.with_streaming_response.create(
model="gpt-4o-mini-tts",
voice="alloy", # Available voices: alloy, echo, fable, onyx, nova, shimmer
input=text,
response_format="mp3"
) as response:
response.stream_to_file(temp_file_path)
# Generate unique filename for MinIO
timestamp = int(time.time())
filename = f"tts_response_{timestamp}.mp3"
# The full path in MinIO will be audio/filename
minio_file_path = f"audio/{filename}"
print(f"Uploading generated audio to MinIO: {minio_file_path}")
# Upload to MinIO
with open(temp_file_path, 'rb') as audio_file:
self.storage_repo.upload_file(audio_file, self.bucket, minio_file_path)
# Clean up temporary file
if os.path.exists(temp_file_path):
os.unlink(temp_file_path)
print(f"Cleaned up temporary file: {temp_file_path}")
print(f"Successfully generated and uploaded TTS audio: {filename}")
# Return just the filename, the audio service will add the audio/ prefix
return filename
except Exception as e:
# Clean up temporary file in case of error
if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
os.unlink(temp_file_path)
print(f"Error generating TTS audio: {e}")
raise HTTPException(status_code=500, detail=f"Failed to generate audio: {str(e)}")
def process_webhook_response(self, response: WebhookResponse) -> dict: def process_webhook_response(self, response: WebhookResponse) -> dict:
"""Process webhook response from n8n and generate TTS audio"""
print("Received webhook data from n8n:", response.dict()) print("Received webhook data from n8n:", response.dict())
if response.agent_responded == 'yes' and response.agent_response: if response.agent_responded == 'yes' and response.agent_response:
if response.filename: try:
audio_file_path = self.audio_service.get_audio_file(response.filename) # Generate audio from the text response
self.response_manager.store_response(response.agent_response, audio_file_path) audio_filename = self.generate_audio_from_text(response.agent_response)
print("Agent response stored successfully.")
else: # Store response with generated audio filename (just the filename, not the path)
self.response_manager.store_response(response.agent_response, audio_filename)
print("Agent response with generated TTS audio stored successfully.")
except HTTPException:
# Re-raise HTTP exceptions
raise
except Exception as e:
print(f"Error generating TTS audio, storing text-only response: {e}")
# Fallback to text-only response if audio generation fails
self.response_manager.store_response(response.agent_response) self.response_manager.store_response(response.agent_response)
print("Agent text response stored successfully.") print("Stored text-only response as fallback.")
else:
print("No valid agent response received from webhook.")
return {"status": ResponseStatus.SUCCESS, "message": "Webhook received and processed successfully."} return {"status": ResponseStatus.SUCCESS, "message": "Webhook received and processed successfully."}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment