whisper in backend and open ai service class

parent ade545c7
...@@ -7,5 +7,5 @@ COPY requirements.txt . ...@@ -7,5 +7,5 @@ COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY . . COPY . .
#keep the container running always
CMD ["python", "main.py"] CMD ["sh", "-c", "while true; do sleep 30; done"]
from fastapi import UploadFile from fastapi import UploadFile, HTTPException
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import sys import sys
import io
sys.path.append("../") sys.path.append("../")
from repositories import WebhookClient, StorageRepository from repositories import WebhookClient, StorageRepository
from core import MessageType, ResponseStatus from core import MessageType, ResponseStatus
from services import OpenAIService
class MessageHandler(ABC): class MessageHandler(ABC):
...@@ -11,19 +13,71 @@ class MessageHandler(ABC): ...@@ -11,19 +13,71 @@ class MessageHandler(ABC):
def handle(self, **kwargs) -> dict: def handle(self, **kwargs) -> dict:
pass pass
class AudioMessageHandler(): class AudioMessageHandler():
def __init__(self, storage_repo: StorageRepository, webhook_client: WebhookClient, bucket: str): def __init__(self, storage_repo: StorageRepository, webhook_client: WebhookClient,
bucket: str, openai_service: OpenAIService):
self.storage_repo = storage_repo self.storage_repo = storage_repo
self.webhook_client = webhook_client self.webhook_client = webhook_client
self.bucket = bucket self.bucket = bucket
self.openai_service = openai_service
def handle(self, file: UploadFile, **kwargs) -> dict: def handle(self, file: UploadFile, **kwargs) -> dict:
file_path = f"audio/{file.filename}" try:
self.storage_repo.upload_file(file.file, self.bucket, file_path) # Read file content
print(f"Uploaded {file.filename} to MinIO.") file.file.seek(0)
file_content = file.file.read()
payload = {"type": MessageType.AUDIO, "filename": file.filename}
self.webhook_client.send_webhook(payload) if not file_content:
raise HTTPException(status_code=400, detail="Empty audio file received")
return {"status": ResponseStatus.SUCCESS, "message": "Message received and forwarded to n8n."}
# Upload original file to MinIO for backup
file_path = f"audio/{file.filename}"
file_stream = io.BytesIO(file_content)
self.storage_repo.upload_file(file_stream, self.bucket, file_path)
print(f"Uploaded {file.filename} to MinIO at {file_path}")
# Try to transcribe the audio using OpenAI service
try:
transcribed_text = self.openai_service.transcribe_audio(file_content, file.filename)
# Send transcribed text to n8n
payload = {
"type": MessageType.AUDIO,
"message": transcribed_text,
}
self.webhook_client.send_webhook(payload)
print(f"Sent transcribed text to n8n: {transcribed_text[:100]}...")
return {
"status": ResponseStatus.SUCCESS,
"message": "Audio transcribed and forwarded to n8n.",
"transcription": transcribed_text
}
except Exception as transcription_error:
print(f"Transcription failed: {transcription_error}")
# Fallback: send filename to n8n for processing there
payload = {
"type": MessageType.AUDIO,
"filename": file.filename,
"message": "تم تسجيل رسالة صوتية - فشل في التفريغ المحلي",
"transcription_source": "fallback",
"error": str(transcription_error)
}
self.webhook_client.send_webhook(payload)
print(f"Sent filename to n8n as fallback: {file.filename}")
return {
"status": ResponseStatus.SUCCESS,
"message": "Audio uploaded, transcription failed - sent to n8n for processing.",
"filename": file.filename,
"transcription_error": str(transcription_error)
}
except Exception as e:
print(f"Error processing audio message: {e}")
raise HTTPException(status_code=500, detail=f"Failed to process audio message: {str(e)}")
\ No newline at end of file
...@@ -22,18 +22,33 @@ from schemas import WebhookResponse, TextMessage ...@@ -22,18 +22,33 @@ from schemas import WebhookResponse, TextMessage
from repositories import StorageRepository, MinIOStorageRepository from repositories import StorageRepository, MinIOStorageRepository
from repositories import WebhookClient, N8NWebhookClient from repositories import WebhookClient, N8NWebhookClient
from handlers import AudioMessageHandler, TextMessageHandler from handlers import AudioMessageHandler, TextMessageHandler
from services import AudioService, ChatService, HealthService, ResponseService, ResponseManager, WebhookService from services import AudioService, ChatService, HealthService, ResponseService, ResponseManager, WebhookService, OpenAIService
# Dependency Container
class DIContainer: class DIContainer:
def __init__(self): def __init__(self):
self.config = AppConfig.from_env() self.config = AppConfig.from_env()
self.storage_repo = MinIOStorageRepository(self.config) self.storage_repo = MinIOStorageRepository(self.config)
self.webhook_client = N8NWebhookClient(self.config.n8n_webhook_url) self.webhook_client = N8NWebhookClient(self.config.n8n_webhook_url)
self.response_manager = ResponseManager() self.response_manager = ResponseManager()
# Initialize OpenAI service
self.openai_service = OpenAIService()
# Updated services to use OpenAI service
self.audio_service = AudioService(self.storage_repo, self.config.minio_bucket) self.audio_service = AudioService(self.storage_repo, self.config.minio_bucket)
self.chat_service = ChatService(self.storage_repo, self.webhook_client, self.response_manager, self.config) self.chat_service = ChatService(
self.webhook_service = WebhookService(self.response_manager, self.storage_repo, self.config.minio_bucket) self.storage_repo,
self.webhook_client,
self.response_manager,
self.config,
self.openai_service # Pass OpenAI service
)
self.webhook_service = WebhookService(
self.response_manager,
self.storage_repo,
self.config.minio_bucket,
self.openai_service # Pass OpenAI service
)
self.response_service = ResponseService(self.response_manager, self.audio_service) self.response_service = ResponseService(self.response_manager, self.audio_service)
self.health_service = HealthService(self.storage_repo, self.config) self.health_service = HealthService(self.storage_repo, self.config)
......
...@@ -4,3 +4,4 @@ from .health_service import HealthService ...@@ -4,3 +4,4 @@ from .health_service import HealthService
from .response_service import ResponseService from .response_service import ResponseService
from .response_manager import ResponseManager from .response_manager import ResponseManager
from .webhook_service import WebhookService from .webhook_service import WebhookService
from .openai_service import OpenAIService
...@@ -5,21 +5,28 @@ import os ...@@ -5,21 +5,28 @@ import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from core import MessageType, AppConfig from core import MessageType, AppConfig
from repositories import StorageRepository, WebhookClient from repositories import StorageRepository, WebhookClient
from handlers import AudioMessageHandler, TextMessageHandler
from services.response_manager import ResponseManager from services.response_manager import ResponseManager
from services.openai_service import OpenAIService
class ChatService: class ChatService:
def __init__(self, storage_repo: StorageRepository, webhook_client: WebhookClient, def __init__(self, storage_repo: StorageRepository, webhook_client: WebhookClient,
response_manager: ResponseManager, config: AppConfig): response_manager: ResponseManager, config: AppConfig, openai_service: OpenAIService):
from handlers import AudioMessageHandler, TextMessageHandler
self.storage_repo = storage_repo self.storage_repo = storage_repo
self.webhook_client = webhook_client self.webhook_client = webhook_client
self.response_manager = response_manager self.response_manager = response_manager
self.config = config self.config = config
self.openai_service = openai_service
# Message handlers # Message handlers with OpenAI service dependency
self.handlers = { self.handlers = {
MessageType.AUDIO: AudioMessageHandler(storage_repo, webhook_client, config.minio_bucket), MessageType.AUDIO: AudioMessageHandler(
storage_repo,
webhook_client,
config.minio_bucket,
openai_service # Pass OpenAI service
),
MessageType.TEXT: TextMessageHandler(webhook_client) MessageType.TEXT: TextMessageHandler(webhook_client)
} }
...@@ -31,4 +38,4 @@ class ChatService: ...@@ -31,4 +38,4 @@ class ChatService:
elif text: elif text:
return self.handlers[MessageType.TEXT].handle(text=text) return self.handlers[MessageType.TEXT].handle(text=text)
else: else:
raise HTTPException(status_code=400, detail="No text or audio file provided.") raise HTTPException(status_code=400, detail="No text or audio file provided.")
\ No newline at end of file
import os
import time
import tempfile
import io
from typing import Optional
from fastapi import HTTPException
from openai import OpenAI
class OpenAIService:
"""Service class for handling OpenAI API operations (TTS and Whisper)"""
def __init__(self):
self.api_key = os.getenv("OPENAI_API_KEY")
if not self.api_key:
print("Warning: OPENAI_API_KEY not found. OpenAI services will be disabled.")
self.client = None
else:
self.client = OpenAI(api_key=self.api_key)
def is_available(self) -> bool:
"""Check if OpenAI service is available"""
return self.client is not None
def transcribe_audio(self, file_content: bytes, filename: str, language: Optional[str] = "ar") -> str:
"""
Transcribe audio using OpenAI Whisper
Args:
file_content: Audio file content as bytes
filename: Original filename for context
language: Language code (optional, defaults to Arabic)
Returns:
Transcribed text
Raises:
HTTPException: If transcription fails or service unavailable
"""
if not self.is_available():
raise HTTPException(status_code=500, detail="OpenAI service not available")
try:
# Create file-like object for the API
audio_file = io.BytesIO(file_content)
audio_file.name = filename
print(f"Transcribing audio: {filename}")
# Call Whisper API
transcript = self.client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
language=language if language else None # Auto-detect if None
)
transcribed_text = transcript.text.strip()
if not transcribed_text:
raise ValueError("Empty transcription result")
print(f"Transcription successful: {transcribed_text[:100]}...")
return transcribed_text
except Exception as e:
print(f"Error during transcription: {e}")
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
def generate_speech(self, text: str, voice: str = "alloy", model: str = "tts-1") -> str:
"""
Generate speech from text using OpenAI TTS
Args:
text: Text to convert to speech
voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer)
model: TTS model to use (tts-1 or tts-1-hd)
Returns:
Path to temporary file containing the generated audio
Raises:
HTTPException: If TTS generation fails or service unavailable
"""
if not self.is_available():
raise HTTPException(status_code=500, detail="OpenAI service not available")
temp_file_path = None
try:
# Create temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
temp_file_path = temp_file.name
temp_file.close()
print(f"Generating TTS audio: {text[:50]}...")
# Generate audio using OpenAI TTS
with self.client.audio.speech.with_streaming_response.create(
model=model,
voice=voice,
input=text,
response_format="mp3"
) as response:
response.stream_to_file(temp_file_path)
print(f"TTS generation successful, saved to: {temp_file_path}")
return temp_file_path
except Exception as e:
# Clean up temp file on error
if temp_file_path and os.path.exists(temp_file_path):
os.unlink(temp_file_path)
print(f"Error during TTS generation: {e}")
raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
def cleanup_temp_file(self, file_path: str) -> None:
"""Clean up temporary file"""
if file_path and os.path.exists(file_path):
try:
os.unlink(file_path)
print(f"Cleaned up temporary file: {file_path}")
except Exception as e:
print(f"Warning: Could not clean up temp file {file_path}: {e}")
\ No newline at end of file
from fastapi import HTTPException from fastapi import HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
from enum import Enum
import sys import sys
import os import os
import time import time
import tempfile
from openai import OpenAI
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from core import ResponseStatus from core import ResponseStatus
from schemas import WebhookResponse from schemas import WebhookResponse
from services.response_manager import ResponseManager from services.response_manager import ResponseManager
from services.openai_service import OpenAIService
from repositories import StorageRepository from repositories import StorageRepository
class WebhookService: class WebhookService:
def __init__(self, response_manager: ResponseManager, storage_repo: StorageRepository, bucket: str): def __init__(self, response_manager: ResponseManager, storage_repo: StorageRepository,
bucket: str, openai_service: OpenAIService):
self.response_manager = response_manager self.response_manager = response_manager
self.storage_repo = storage_repo self.storage_repo = storage_repo
self.bucket = bucket self.bucket = bucket
self.openai_service = openai_service
# Initialize OpenAI client
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("Warning: OPENAI_API_KEY not found. TTS functionality will be disabled.")
self.openai_client = None
else:
self.openai_client = OpenAI(api_key=openai_api_key)
def generate_audio_from_text(self, text: str) -> str: def generate_and_upload_audio(self, text: str) -> str:
"""Generate audio from text using OpenAI TTS and upload to MinIO""" """Generate audio from text and upload to MinIO, return filename"""
if not self.openai_client:
raise HTTPException(status_code=500, detail="OpenAI API key not configured")
try: try:
# Create temporary file for audio # Generate audio using OpenAI service
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file_path = self.openai_service.generate_speech(text)
temp_file_path = temp_file.name
temp_file.close()
print(f"Generating TTS audio for text: {text[:50]}...") # Generate unique filename for MinIO
# Generate audio using OpenAI TTS
with self.openai_client.audio.speech.with_streaming_response.create(
model="gpt-4o-mini-tts",
voice="alloy", # Available voices: alloy, echo, fable, onyx, nova, shimmer
input=text,
response_format="mp3"
) as response:
response.stream_to_file(temp_file_path)
# Generate unique filename for MinIO
timestamp = int(time.time()) timestamp = int(time.time())
filename = f"tts_response_{timestamp}.mp3" filename = f"tts_response_{timestamp}.mp3"
# The full path in MinIO will be audio/filename
minio_file_path = f"audio/{filename}" minio_file_path = f"audio/{filename}"
print(f"Uploading generated audio to MinIO: {minio_file_path}") print(f"Uploading generated audio to MinIO: {minio_file_path}")
...@@ -62,20 +38,13 @@ class WebhookService: ...@@ -62,20 +38,13 @@ class WebhookService:
self.storage_repo.upload_file(audio_file, self.bucket, minio_file_path) self.storage_repo.upload_file(audio_file, self.bucket, minio_file_path)
# Clean up temporary file # Clean up temporary file
if os.path.exists(temp_file_path): self.openai_service.cleanup_temp_file(temp_file_path)
os.unlink(temp_file_path)
print(f"Cleaned up temporary file: {temp_file_path}")
print(f"Successfully generated and uploaded TTS audio: {filename}") print(f"Successfully generated and uploaded TTS audio: {filename}")
# Return just the filename, the audio service will add the audio/ prefix
return filename return filename
except Exception as e: except Exception as e:
# Clean up temporary file in case of error print(f"Error generating and uploading audio: {e}")
if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
os.unlink(temp_file_path)
print(f"Error generating TTS audio: {e}")
raise HTTPException(status_code=500, detail=f"Failed to generate audio: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to generate audio: {str(e)}")
def process_webhook_response(self, response: WebhookResponse) -> dict: def process_webhook_response(self, response: WebhookResponse) -> dict:
...@@ -84,10 +53,10 @@ class WebhookService: ...@@ -84,10 +53,10 @@ class WebhookService:
if response.agent_responded == 'yes' and response.agent_response: if response.agent_responded == 'yes' and response.agent_response:
try: try:
# Generate audio from the text response # Generate audio from the text response using OpenAI service
audio_filename = self.generate_audio_from_text(response.agent_response) audio_filename = self.generate_and_upload_audio(response.agent_response)
# Store response with generated audio filename (just the filename, not the path) # Store response with generated audio filename
self.response_manager.store_response(response.agent_response, audio_filename) self.response_manager.store_response(response.agent_response, audio_filename)
print("Agent response with generated TTS audio stored successfully.") print("Agent response with generated TTS audio stored successfully.")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment