Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
e6df7c91
Commit
e6df7c91
authored
Sep 10, 2025
by
SalmaMohammedHamedMustafa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use gpt-4o-mini-tts insteas of n8n
parent
0d5c27e2
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
119 additions
and
28 deletions
+119
-28
docker-compose.yml
self_hosted_env/docker-compose.yml
+4
-2
config.py
self_hosted_env/voice_agent/core/config.py
+8
-2
main.py
self_hosted_env/voice_agent/main.py
+1
-2
requirements.txt
self_hosted_env/voice_agent/requirements.txt
+1
-0
audio_service.py
self_hosted_env/voice_agent/services/audio_service.py
+10
-3
response_manager.py
self_hosted_env/voice_agent/services/response_manager.py
+6
-5
response_service.py
self_hosted_env/voice_agent/services/response_service.py
+4
-3
webhook_service.py
self_hosted_env/voice_agent/services/webhook_service.py
+85
-11
No files found.
self_hosted_env/docker-compose.yml
View file @
e6df7c91
...
...
@@ -61,16 +61,18 @@ services:
-
minio
voice-agent
:
build
:
./
always
# path to your Dockerfile folder
build
:
./
voice_agent
# path to your Dockerfile folder
container_name
:
voice-agent
ports
:
-
"
8000:8000"
# Expose the FastAPI server
restart
:
on-failure
restart
:
always
environment
:
MINIO_ENDPOINT
:
"
http://minio:9000"
MINIO_ACCESS_KEY
:
"
${MINIO_ROOT_USER}"
MINIO_SECRET_KEY
:
"
${MINIO_ROOT_PASSWORD}"
N8N_WEBHOOK_URL
:
"
${N8N_WEBHOOK_URL}"
OPENAI_API_KEY
:
"
${OPENAI_API_KEY}"
MINIO_BUCKET
:
"
${MINIO_BUCKET}"
volumes
:
-
./uploads:/app/uploads
depends_on
:
...
...
self_hosted_env/voice_agent/core/config.py
View file @
e6df7c91
import
os
from
dataclasses
import
dataclass
from
dotenv
import
load_dotenv
# Load environment variables from .env file
load_dotenv
()
# Configuration Management
...
...
@@ -10,6 +14,7 @@ class AppConfig:
minio_secret_key
:
str
minio_bucket
:
str
n8n_webhook_url
:
str
openai_api_key
:
str
@
classmethod
def
from_env
(
cls
)
->
'AppConfig'
:
...
...
@@ -17,6 +22,7 @@ class AppConfig:
minio_endpoint
=
os
.
getenv
(
"MINIO_ENDPOINT"
,
"http://minio:9000"
),
minio_access_key
=
os
.
getenv
(
"MINIO_ACCESS_KEY"
),
minio_secret_key
=
os
.
getenv
(
"MINIO_SECRET_KEY"
),
minio_bucket
=
"coversation"
,
n8n_webhook_url
=
os
.
getenv
(
"N8N_WEBHOOK_URL"
)
minio_bucket
=
os
.
getenv
(
"MINIO_BUCKET"
),
n8n_webhook_url
=
os
.
getenv
(
"N8N_WEBHOOK_URL"
),
openai_api_key
=
os
.
getenv
(
"OPENAI_API_KEY"
)
)
\ No newline at end of file
self_hosted_env/voice_agent/main.py
View file @
e6df7c91
...
...
@@ -33,11 +33,10 @@ class DIContainer:
self
.
response_manager
=
ResponseManager
()
self
.
audio_service
=
AudioService
(
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
chat_service
=
ChatService
(
self
.
storage_repo
,
self
.
webhook_client
,
self
.
response_manager
,
self
.
config
)
self
.
webhook_service
=
WebhookService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
webhook_service
=
WebhookService
(
self
.
response_manager
,
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
response_service
=
ResponseService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
health_service
=
HealthService
(
self
.
storage_repo
,
self
.
config
)
# FastAPI App Factory
def
create_app
()
->
FastAPI
:
app
=
FastAPI
(
title
=
"Unified Chat API"
)
...
...
self_hosted_env/voice_agent/requirements.txt
View file @
e6df7c91
...
...
@@ -5,3 +5,4 @@ soundfile
fastapi
uvicorn[standard]
python-multipart
openai
self_hosted_env/voice_agent/services/audio_service.py
View file @
e6df7c91
...
...
@@ -17,14 +17,21 @@ class AudioService:
def
get_audio_file
(
self
,
filename
:
str
)
->
str
:
try
:
# Add the audio/ prefix to match the MinIO structure
minio_file_path
=
f
"audio/{filename}"
print
(
f
"Attempting to download from MinIO: bucket={self.bucket}, path={minio_file_path}"
)
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
self
.
storage_repo
.
download_file
(
self
.
bucket
,
filename
,
temp_file
.
name
)
self
.
storage_repo
.
download_file
(
self
.
bucket
,
minio_file_path
,
temp_file
.
name
)
temp_file
.
close
()
print
(
f
"Successfully downloaded audio file: {minio_file_path}"
)
return
temp_file
.
name
except
ClientError
as
e
:
print
(
f
"MinIO ClientError: {e}"
)
print
(
f
"Failed to find: bucket={self.bucket}, path=audio/{filename}"
)
raise
HTTPException
(
status_code
=
404
,
detail
=
f
"Audio file '{filename}' not found."
)
except
Exception
as
e
:
print
(
f
"An error occurred: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"An error occurred: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"An error occurred: {e}"
)
\ No newline at end of file
self_hosted_env/voice_agent/services/response_manager.py
View file @
e6df7c91
...
...
@@ -3,12 +3,13 @@ from typing import Optional
class
ResponseManager
:
def
__init__
(
self
):
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
_path
"
:
None
,
"timestamp"
:
0
}
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
name
"
:
None
,
"timestamp"
:
0
}
def
store_response
(
self
,
text
:
str
,
audio_file_path
:
Optional
[
str
]
=
None
)
->
None
:
def
store_response
(
self
,
text
:
str
,
audio_filename
:
Optional
[
str
]
=
None
)
->
None
:
"""Store response with audio filename instead of file path"""
self
.
_latest_response
=
{
"text"
:
text
,
"audio_file
_path"
:
audio_file_path
,
"audio_file
name"
:
audio_filename
,
"timestamp"
:
time
.
time
()
}
...
...
@@ -16,8 +17,8 @@ class ResponseManager:
return
self
.
_latest_response
.
copy
()
def
clear_response
(
self
)
->
None
:
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
_path
"
:
None
,
"timestamp"
:
0
}
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
name
"
:
None
,
"timestamp"
:
0
}
def
is_response_fresh
(
self
,
max_age_seconds
:
int
=
300
)
->
bool
:
return
(
self
.
_latest_response
[
"text"
]
and
(
time
.
time
()
-
self
.
_latest_response
[
"timestamp"
]
<
max_age_seconds
))
(
time
.
time
()
-
self
.
_latest_response
[
"timestamp"
]
<
max_age_seconds
))
\ No newline at end of file
self_hosted_env/voice_agent/services/response_service.py
View file @
e6df7c91
...
...
@@ -21,8 +21,9 @@ class ResponseService:
response_data
=
self
.
response_manager
.
get_response
()
if
response_data
[
"audio_file_path"
]:
file_path
=
response_data
[
"audio_file_path"
]
if
response_data
[
"audio_filename"
]:
# Download audio file from MinIO using filename
file_path
=
self
.
audio_service
.
get_audio_file
(
response_data
[
"audio_filename"
])
response_text
=
response_data
[
"text"
]
self
.
response_manager
.
clear_response
()
...
...
@@ -44,4 +45,4 @@ class ResponseService:
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Text response available."
,
"text"
:
response_text
}
}
\ No newline at end of file
self_hosted_env/voice_agent/services/webhook_service.py
View file @
e6df7c91
...
...
@@ -4,28 +4,102 @@ from typing import Optional
from
enum
import
Enum
import
sys
import
os
import
time
import
tempfile
from
openai
import
OpenAI
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
from
core
import
ResponseStatus
from
schemas
import
WebhookResponse
from
services.response_manager
import
ResponseManager
from
services.audio_service
import
AudioService
from
repositories
import
StorageRepository
class
WebhookService
:
def
__init__
(
self
,
response_manager
:
ResponseManager
,
audio_service
:
AudioService
):
def
__init__
(
self
,
response_manager
:
ResponseManager
,
storage_repo
:
StorageRepository
,
bucket
:
str
):
self
.
response_manager
=
response_manager
self
.
audio_service
=
audio_service
self
.
storage_repo
=
storage_repo
self
.
bucket
=
bucket
# Initialize OpenAI client
openai_api_key
=
os
.
getenv
(
"OPENAI_API_KEY"
)
if
not
openai_api_key
:
print
(
"Warning: OPENAI_API_KEY not found. TTS functionality will be disabled."
)
self
.
openai_client
=
None
else
:
self
.
openai_client
=
OpenAI
(
api_key
=
openai_api_key
)
def
generate_audio_from_text
(
self
,
text
:
str
)
->
str
:
"""Generate audio from text using OpenAI TTS and upload to MinIO"""
if
not
self
.
openai_client
:
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI API key not configured"
)
try
:
# Create temporary file for audio
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
temp_file_path
=
temp_file
.
name
temp_file
.
close
()
print
(
f
"Generating TTS audio for text: {text[:50]}..."
)
# Generate audio using OpenAI TTS
with
self
.
openai_client
.
audio
.
speech
.
with_streaming_response
.
create
(
model
=
"gpt-4o-mini-tts"
,
voice
=
"alloy"
,
# Available voices: alloy, echo, fable, onyx, nova, shimmer
input
=
text
,
response_format
=
"mp3"
)
as
response
:
response
.
stream_to_file
(
temp_file_path
)
# Generate unique filename for MinIO
timestamp
=
int
(
time
.
time
())
filename
=
f
"tts_response_{timestamp}.mp3"
# The full path in MinIO will be audio/filename
minio_file_path
=
f
"audio/{filename}"
print
(
f
"Uploading generated audio to MinIO: {minio_file_path}"
)
# Upload to MinIO
with
open
(
temp_file_path
,
'rb'
)
as
audio_file
:
self
.
storage_repo
.
upload_file
(
audio_file
,
self
.
bucket
,
minio_file_path
)
# Clean up temporary file
if
os
.
path
.
exists
(
temp_file_path
):
os
.
unlink
(
temp_file_path
)
print
(
f
"Cleaned up temporary file: {temp_file_path}"
)
print
(
f
"Successfully generated and uploaded TTS audio: {filename}"
)
# Return just the filename, the audio service will add the audio/ prefix
return
filename
except
Exception
as
e
:
# Clean up temporary file in case of error
if
'temp_file_path'
in
locals
()
and
os
.
path
.
exists
(
temp_file_path
):
os
.
unlink
(
temp_file_path
)
print
(
f
"Error generating TTS audio: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Failed to generate audio: {str(e)}"
)
def
process_webhook_response
(
self
,
response
:
WebhookResponse
)
->
dict
:
"""Process webhook response from n8n and generate TTS audio"""
print
(
"Received webhook data from n8n:"
,
response
.
dict
())
if
response
.
agent_responded
==
'yes'
and
response
.
agent_response
:
if
response
.
filename
:
audio_file_path
=
self
.
audio_service
.
get_audio_file
(
response
.
filename
)
self
.
response_manager
.
store_response
(
response
.
agent_response
,
audio_file_path
)
print
(
"Agent response stored successfully."
)
else
:
try
:
# Generate audio from the text response
audio_filename
=
self
.
generate_audio_from_text
(
response
.
agent_response
)
# Store response with generated audio filename (just the filename, not the path)
self
.
response_manager
.
store_response
(
response
.
agent_response
,
audio_filename
)
print
(
"Agent response with generated TTS audio stored successfully."
)
except
HTTPException
:
# Re-raise HTTP exceptions
raise
except
Exception
as
e
:
print
(
f
"Error generating TTS audio, storing text-only response: {e}"
)
# Fallback to text-only response if audio generation fails
self
.
response_manager
.
store_response
(
response
.
agent_response
)
print
(
"Agent text response stored successfully."
)
print
(
"Stored text-only response as fallback."
)
else
:
print
(
"No valid agent response received from webhook."
)
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Webhook received and processed successfully."
}
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Webhook received and processed successfully."
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment