use gpt-4o-mini-tts insteas of n8n

parent 0d5c27e2
......@@ -61,16 +61,18 @@ services:
- minio
voice-agent:
build: ./always # path to your Dockerfile folder
build: ./voice_agent # path to your Dockerfile folder
container_name: voice-agent
ports:
- "8000:8000" # Expose the FastAPI server
restart: on-failure
restart: always
environment:
MINIO_ENDPOINT: "http://minio:9000"
MINIO_ACCESS_KEY: "${MINIO_ROOT_USER}"
MINIO_SECRET_KEY: "${MINIO_ROOT_PASSWORD}"
N8N_WEBHOOK_URL: "${N8N_WEBHOOK_URL}"
OPENAI_API_KEY: "${OPENAI_API_KEY}"
MINIO_BUCKET: "${MINIO_BUCKET}"
volumes:
- ./uploads:/app/uploads
depends_on:
......
import os
from dataclasses import dataclass
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Configuration Management
......@@ -10,6 +14,7 @@ class AppConfig:
minio_secret_key: str
minio_bucket: str
n8n_webhook_url: str
openai_api_key: str
@classmethod
def from_env(cls) -> 'AppConfig':
......@@ -17,6 +22,7 @@ class AppConfig:
minio_endpoint=os.getenv("MINIO_ENDPOINT", "http://minio:9000"),
minio_access_key=os.getenv("MINIO_ACCESS_KEY"),
minio_secret_key=os.getenv("MINIO_SECRET_KEY"),
minio_bucket="coversation",
n8n_webhook_url=os.getenv("N8N_WEBHOOK_URL")
minio_bucket=os.getenv("MINIO_BUCKET"),
n8n_webhook_url=os.getenv("N8N_WEBHOOK_URL"),
openai_api_key=os.getenv("OPENAI_API_KEY")
)
\ No newline at end of file
......@@ -33,11 +33,10 @@ class DIContainer:
self.response_manager = ResponseManager()
self.audio_service = AudioService(self.storage_repo, self.config.minio_bucket)
self.chat_service = ChatService(self.storage_repo, self.webhook_client, self.response_manager, self.config)
self.webhook_service = WebhookService(self.response_manager, self.audio_service)
self.webhook_service = WebhookService(self.response_manager, self.storage_repo, self.config.minio_bucket)
self.response_service = ResponseService(self.response_manager, self.audio_service)
self.health_service = HealthService(self.storage_repo, self.config)
# FastAPI App Factory
def create_app() -> FastAPI:
app = FastAPI(title="Unified Chat API")
......
......@@ -5,3 +5,4 @@ soundfile
fastapi
uvicorn[standard]
python-multipart
openai
......@@ -17,14 +17,21 @@ class AudioService:
def get_audio_file(self, filename: str) -> str:
try:
# Add the audio/ prefix to match the MinIO structure
minio_file_path = f"audio/{filename}"
print(f"Attempting to download from MinIO: bucket={self.bucket}, path={minio_file_path}")
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
self.storage_repo.download_file(self.bucket, filename, temp_file.name)
self.storage_repo.download_file(self.bucket, minio_file_path, temp_file.name)
temp_file.close()
print(f"Successfully downloaded audio file: {minio_file_path}")
return temp_file.name
except ClientError as e:
print(f"MinIO ClientError: {e}")
print(f"Failed to find: bucket={self.bucket}, path=audio/{filename}")
raise HTTPException(status_code=404, detail=f"Audio file '{filename}' not found.")
except Exception as e:
print(f"An error occurred: {e}")
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
\ No newline at end of file
......@@ -3,12 +3,13 @@ from typing import Optional
class ResponseManager:
def __init__(self):
self._latest_response = {"text": None, "audio_file_path": None, "timestamp": 0}
self._latest_response = {"text": None, "audio_filename": None, "timestamp": 0}
def store_response(self, text: str, audio_file_path: Optional[str] = None) -> None:
def store_response(self, text: str, audio_filename: Optional[str] = None) -> None:
"""Store response with audio filename instead of file path"""
self._latest_response = {
"text": text,
"audio_file_path": audio_file_path,
"audio_filename": audio_filename,
"timestamp": time.time()
}
......@@ -16,8 +17,8 @@ class ResponseManager:
return self._latest_response.copy()
def clear_response(self) -> None:
self._latest_response = {"text": None, "audio_file_path": None, "timestamp": 0}
self._latest_response = {"text": None, "audio_filename": None, "timestamp": 0}
def is_response_fresh(self, max_age_seconds: int = 300) -> bool:
return (self._latest_response["text"] and
(time.time() - self._latest_response["timestamp"] < max_age_seconds))
(time.time() - self._latest_response["timestamp"] < max_age_seconds))
\ No newline at end of file
......@@ -21,8 +21,9 @@ class ResponseService:
response_data = self.response_manager.get_response()
if response_data["audio_file_path"]:
file_path = response_data["audio_file_path"]
if response_data["audio_filename"]:
# Download audio file from MinIO using filename
file_path = self.audio_service.get_audio_file(response_data["audio_filename"])
response_text = response_data["text"]
self.response_manager.clear_response()
......@@ -44,4 +45,4 @@ class ResponseService:
"status": ResponseStatus.SUCCESS,
"message": "Text response available.",
"text": response_text
}
}
\ No newline at end of file
......@@ -4,28 +4,102 @@ from typing import Optional
from enum import Enum
import sys
import os
import time
import tempfile
from openai import OpenAI
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from core import ResponseStatus
from schemas import WebhookResponse
from services.response_manager import ResponseManager
from services.audio_service import AudioService
from repositories import StorageRepository
class WebhookService:
def __init__(self, response_manager: ResponseManager, audio_service: AudioService):
def __init__(self, response_manager: ResponseManager, storage_repo: StorageRepository, bucket: str):
self.response_manager = response_manager
self.audio_service = audio_service
self.storage_repo = storage_repo
self.bucket = bucket
# Initialize OpenAI client
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("Warning: OPENAI_API_KEY not found. TTS functionality will be disabled.")
self.openai_client = None
else:
self.openai_client = OpenAI(api_key=openai_api_key)
def generate_audio_from_text(self, text: str) -> str:
"""Generate audio from text using OpenAI TTS and upload to MinIO"""
if not self.openai_client:
raise HTTPException(status_code=500, detail="OpenAI API key not configured")
try:
# Create temporary file for audio
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
temp_file_path = temp_file.name
temp_file.close()
print(f"Generating TTS audio for text: {text[:50]}...")
# Generate audio using OpenAI TTS
with self.openai_client.audio.speech.with_streaming_response.create(
model="gpt-4o-mini-tts",
voice="alloy", # Available voices: alloy, echo, fable, onyx, nova, shimmer
input=text,
response_format="mp3"
) as response:
response.stream_to_file(temp_file_path)
# Generate unique filename for MinIO
timestamp = int(time.time())
filename = f"tts_response_{timestamp}.mp3"
# The full path in MinIO will be audio/filename
minio_file_path = f"audio/{filename}"
print(f"Uploading generated audio to MinIO: {minio_file_path}")
# Upload to MinIO
with open(temp_file_path, 'rb') as audio_file:
self.storage_repo.upload_file(audio_file, self.bucket, minio_file_path)
# Clean up temporary file
if os.path.exists(temp_file_path):
os.unlink(temp_file_path)
print(f"Cleaned up temporary file: {temp_file_path}")
print(f"Successfully generated and uploaded TTS audio: {filename}")
# Return just the filename, the audio service will add the audio/ prefix
return filename
except Exception as e:
# Clean up temporary file in case of error
if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
os.unlink(temp_file_path)
print(f"Error generating TTS audio: {e}")
raise HTTPException(status_code=500, detail=f"Failed to generate audio: {str(e)}")
def process_webhook_response(self, response: WebhookResponse) -> dict:
"""Process webhook response from n8n and generate TTS audio"""
print("Received webhook data from n8n:", response.dict())
if response.agent_responded == 'yes' and response.agent_response:
if response.filename:
audio_file_path = self.audio_service.get_audio_file(response.filename)
self.response_manager.store_response(response.agent_response, audio_file_path)
print("Agent response stored successfully.")
else:
try:
# Generate audio from the text response
audio_filename = self.generate_audio_from_text(response.agent_response)
# Store response with generated audio filename (just the filename, not the path)
self.response_manager.store_response(response.agent_response, audio_filename)
print("Agent response with generated TTS audio stored successfully.")
except HTTPException:
# Re-raise HTTP exceptions
raise
except Exception as e:
print(f"Error generating TTS audio, storing text-only response: {e}")
# Fallback to text-only response if audio generation fails
self.response_manager.store_response(response.agent_response)
print("Agent text response stored successfully.")
print("Stored text-only response as fallback.")
else:
print("No valid agent response received from webhook.")
return {"status": ResponseStatus.SUCCESS, "message": "Webhook received and processed successfully."}
return {"status": ResponseStatus.SUCCESS, "message": "Webhook received and processed successfully."}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment