Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
e6df7c91
Commit
e6df7c91
authored
Sep 10, 2025
by
SalmaMohammedHamedMustafa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use gpt-4o-mini-tts insteas of n8n
parent
0d5c27e2
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
119 additions
and
28 deletions
+119
-28
docker-compose.yml
self_hosted_env/docker-compose.yml
+4
-2
config.py
self_hosted_env/voice_agent/core/config.py
+8
-2
main.py
self_hosted_env/voice_agent/main.py
+1
-2
requirements.txt
self_hosted_env/voice_agent/requirements.txt
+1
-0
audio_service.py
self_hosted_env/voice_agent/services/audio_service.py
+10
-3
response_manager.py
self_hosted_env/voice_agent/services/response_manager.py
+6
-5
response_service.py
self_hosted_env/voice_agent/services/response_service.py
+4
-3
webhook_service.py
self_hosted_env/voice_agent/services/webhook_service.py
+85
-11
No files found.
self_hosted_env/docker-compose.yml
View file @
e6df7c91
...
@@ -61,16 +61,18 @@ services:
...
@@ -61,16 +61,18 @@ services:
-
minio
-
minio
voice-agent
:
voice-agent
:
build
:
./
always
# path to your Dockerfile folder
build
:
./
voice_agent
# path to your Dockerfile folder
container_name
:
voice-agent
container_name
:
voice-agent
ports
:
ports
:
-
"
8000:8000"
# Expose the FastAPI server
-
"
8000:8000"
# Expose the FastAPI server
restart
:
on-failure
restart
:
always
environment
:
environment
:
MINIO_ENDPOINT
:
"
http://minio:9000"
MINIO_ENDPOINT
:
"
http://minio:9000"
MINIO_ACCESS_KEY
:
"
${MINIO_ROOT_USER}"
MINIO_ACCESS_KEY
:
"
${MINIO_ROOT_USER}"
MINIO_SECRET_KEY
:
"
${MINIO_ROOT_PASSWORD}"
MINIO_SECRET_KEY
:
"
${MINIO_ROOT_PASSWORD}"
N8N_WEBHOOK_URL
:
"
${N8N_WEBHOOK_URL}"
N8N_WEBHOOK_URL
:
"
${N8N_WEBHOOK_URL}"
OPENAI_API_KEY
:
"
${OPENAI_API_KEY}"
MINIO_BUCKET
:
"
${MINIO_BUCKET}"
volumes
:
volumes
:
-
./uploads:/app/uploads
-
./uploads:/app/uploads
depends_on
:
depends_on
:
...
...
self_hosted_env/voice_agent/core/config.py
View file @
e6df7c91
import
os
import
os
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
dotenv
import
load_dotenv
# Load environment variables from .env file
load_dotenv
()
# Configuration Management
# Configuration Management
...
@@ -10,6 +14,7 @@ class AppConfig:
...
@@ -10,6 +14,7 @@ class AppConfig:
minio_secret_key
:
str
minio_secret_key
:
str
minio_bucket
:
str
minio_bucket
:
str
n8n_webhook_url
:
str
n8n_webhook_url
:
str
openai_api_key
:
str
@
classmethod
@
classmethod
def
from_env
(
cls
)
->
'AppConfig'
:
def
from_env
(
cls
)
->
'AppConfig'
:
...
@@ -17,6 +22,7 @@ class AppConfig:
...
@@ -17,6 +22,7 @@ class AppConfig:
minio_endpoint
=
os
.
getenv
(
"MINIO_ENDPOINT"
,
"http://minio:9000"
),
minio_endpoint
=
os
.
getenv
(
"MINIO_ENDPOINT"
,
"http://minio:9000"
),
minio_access_key
=
os
.
getenv
(
"MINIO_ACCESS_KEY"
),
minio_access_key
=
os
.
getenv
(
"MINIO_ACCESS_KEY"
),
minio_secret_key
=
os
.
getenv
(
"MINIO_SECRET_KEY"
),
minio_secret_key
=
os
.
getenv
(
"MINIO_SECRET_KEY"
),
minio_bucket
=
"coversation"
,
minio_bucket
=
os
.
getenv
(
"MINIO_BUCKET"
),
n8n_webhook_url
=
os
.
getenv
(
"N8N_WEBHOOK_URL"
)
n8n_webhook_url
=
os
.
getenv
(
"N8N_WEBHOOK_URL"
),
openai_api_key
=
os
.
getenv
(
"OPENAI_API_KEY"
)
)
)
\ No newline at end of file
self_hosted_env/voice_agent/main.py
View file @
e6df7c91
...
@@ -33,11 +33,10 @@ class DIContainer:
...
@@ -33,11 +33,10 @@ class DIContainer:
self
.
response_manager
=
ResponseManager
()
self
.
response_manager
=
ResponseManager
()
self
.
audio_service
=
AudioService
(
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
audio_service
=
AudioService
(
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
chat_service
=
ChatService
(
self
.
storage_repo
,
self
.
webhook_client
,
self
.
response_manager
,
self
.
config
)
self
.
chat_service
=
ChatService
(
self
.
storage_repo
,
self
.
webhook_client
,
self
.
response_manager
,
self
.
config
)
self
.
webhook_service
=
WebhookService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
webhook_service
=
WebhookService
(
self
.
response_manager
,
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
response_service
=
ResponseService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
response_service
=
ResponseService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
health_service
=
HealthService
(
self
.
storage_repo
,
self
.
config
)
self
.
health_service
=
HealthService
(
self
.
storage_repo
,
self
.
config
)
# FastAPI App Factory
# FastAPI App Factory
def
create_app
()
->
FastAPI
:
def
create_app
()
->
FastAPI
:
app
=
FastAPI
(
title
=
"Unified Chat API"
)
app
=
FastAPI
(
title
=
"Unified Chat API"
)
...
...
self_hosted_env/voice_agent/requirements.txt
View file @
e6df7c91
...
@@ -5,3 +5,4 @@ soundfile
...
@@ -5,3 +5,4 @@ soundfile
fastapi
fastapi
uvicorn[standard]
uvicorn[standard]
python-multipart
python-multipart
openai
self_hosted_env/voice_agent/services/audio_service.py
View file @
e6df7c91
...
@@ -17,14 +17,21 @@ class AudioService:
...
@@ -17,14 +17,21 @@ class AudioService:
def
get_audio_file
(
self
,
filename
:
str
)
->
str
:
def
get_audio_file
(
self
,
filename
:
str
)
->
str
:
try
:
try
:
# Add the audio/ prefix to match the MinIO structure
minio_file_path
=
f
"audio/{filename}"
print
(
f
"Attempting to download from MinIO: bucket={self.bucket}, path={minio_file_path}"
)
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
self
.
storage_repo
.
download_file
(
self
.
bucket
,
filename
,
temp_file
.
name
)
self
.
storage_repo
.
download_file
(
self
.
bucket
,
minio_file_path
,
temp_file
.
name
)
temp_file
.
close
()
temp_file
.
close
()
print
(
f
"Successfully downloaded audio file: {minio_file_path}"
)
return
temp_file
.
name
return
temp_file
.
name
except
ClientError
as
e
:
except
ClientError
as
e
:
print
(
f
"MinIO ClientError: {e}"
)
print
(
f
"MinIO ClientError: {e}"
)
print
(
f
"Failed to find: bucket={self.bucket}, path=audio/{filename}"
)
raise
HTTPException
(
status_code
=
404
,
detail
=
f
"Audio file '{filename}' not found."
)
raise
HTTPException
(
status_code
=
404
,
detail
=
f
"Audio file '{filename}' not found."
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
f
"An error occurred: {e}"
)
print
(
f
"An error occurred: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"An error occurred: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"An error occurred: {e}"
)
\ No newline at end of file
self_hosted_env/voice_agent/services/response_manager.py
View file @
e6df7c91
...
@@ -3,12 +3,13 @@ from typing import Optional
...
@@ -3,12 +3,13 @@ from typing import Optional
class
ResponseManager
:
class
ResponseManager
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
_path
"
:
None
,
"timestamp"
:
0
}
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
name
"
:
None
,
"timestamp"
:
0
}
def
store_response
(
self
,
text
:
str
,
audio_file_path
:
Optional
[
str
]
=
None
)
->
None
:
def
store_response
(
self
,
text
:
str
,
audio_filename
:
Optional
[
str
]
=
None
)
->
None
:
"""Store response with audio filename instead of file path"""
self
.
_latest_response
=
{
self
.
_latest_response
=
{
"text"
:
text
,
"text"
:
text
,
"audio_file
_path"
:
audio_file_path
,
"audio_file
name"
:
audio_filename
,
"timestamp"
:
time
.
time
()
"timestamp"
:
time
.
time
()
}
}
...
@@ -16,8 +17,8 @@ class ResponseManager:
...
@@ -16,8 +17,8 @@ class ResponseManager:
return
self
.
_latest_response
.
copy
()
return
self
.
_latest_response
.
copy
()
def
clear_response
(
self
)
->
None
:
def
clear_response
(
self
)
->
None
:
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
_path
"
:
None
,
"timestamp"
:
0
}
self
.
_latest_response
=
{
"text"
:
None
,
"audio_file
name
"
:
None
,
"timestamp"
:
0
}
def
is_response_fresh
(
self
,
max_age_seconds
:
int
=
300
)
->
bool
:
def
is_response_fresh
(
self
,
max_age_seconds
:
int
=
300
)
->
bool
:
return
(
self
.
_latest_response
[
"text"
]
and
return
(
self
.
_latest_response
[
"text"
]
and
(
time
.
time
()
-
self
.
_latest_response
[
"timestamp"
]
<
max_age_seconds
))
(
time
.
time
()
-
self
.
_latest_response
[
"timestamp"
]
<
max_age_seconds
))
\ No newline at end of file
self_hosted_env/voice_agent/services/response_service.py
View file @
e6df7c91
...
@@ -21,8 +21,9 @@ class ResponseService:
...
@@ -21,8 +21,9 @@ class ResponseService:
response_data
=
self
.
response_manager
.
get_response
()
response_data
=
self
.
response_manager
.
get_response
()
if
response_data
[
"audio_file_path"
]:
if
response_data
[
"audio_filename"
]:
file_path
=
response_data
[
"audio_file_path"
]
# Download audio file from MinIO using filename
file_path
=
self
.
audio_service
.
get_audio_file
(
response_data
[
"audio_filename"
])
response_text
=
response_data
[
"text"
]
response_text
=
response_data
[
"text"
]
self
.
response_manager
.
clear_response
()
self
.
response_manager
.
clear_response
()
...
@@ -44,4 +45,4 @@ class ResponseService:
...
@@ -44,4 +45,4 @@ class ResponseService:
"status"
:
ResponseStatus
.
SUCCESS
,
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Text response available."
,
"message"
:
"Text response available."
,
"text"
:
response_text
"text"
:
response_text
}
}
\ No newline at end of file
self_hosted_env/voice_agent/services/webhook_service.py
View file @
e6df7c91
...
@@ -4,28 +4,102 @@ from typing import Optional
...
@@ -4,28 +4,102 @@ from typing import Optional
from
enum
import
Enum
from
enum
import
Enum
import
sys
import
sys
import
os
import
os
import
time
import
tempfile
from
openai
import
OpenAI
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
from
core
import
ResponseStatus
from
core
import
ResponseStatus
from
schemas
import
WebhookResponse
from
schemas
import
WebhookResponse
from
services.response_manager
import
ResponseManager
from
services.response_manager
import
ResponseManager
from
services.audio_service
import
AudioService
from
repositories
import
StorageRepository
class
WebhookService
:
class
WebhookService
:
def
__init__
(
self
,
response_manager
:
ResponseManager
,
audio_service
:
AudioService
):
def
__init__
(
self
,
response_manager
:
ResponseManager
,
storage_repo
:
StorageRepository
,
bucket
:
str
):
self
.
response_manager
=
response_manager
self
.
response_manager
=
response_manager
self
.
audio_service
=
audio_service
self
.
storage_repo
=
storage_repo
self
.
bucket
=
bucket
# Initialize OpenAI client
openai_api_key
=
os
.
getenv
(
"OPENAI_API_KEY"
)
if
not
openai_api_key
:
print
(
"Warning: OPENAI_API_KEY not found. TTS functionality will be disabled."
)
self
.
openai_client
=
None
else
:
self
.
openai_client
=
OpenAI
(
api_key
=
openai_api_key
)
def
generate_audio_from_text
(
self
,
text
:
str
)
->
str
:
"""Generate audio from text using OpenAI TTS and upload to MinIO"""
if
not
self
.
openai_client
:
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI API key not configured"
)
try
:
# Create temporary file for audio
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
temp_file_path
=
temp_file
.
name
temp_file
.
close
()
print
(
f
"Generating TTS audio for text: {text[:50]}..."
)
# Generate audio using OpenAI TTS
with
self
.
openai_client
.
audio
.
speech
.
with_streaming_response
.
create
(
model
=
"gpt-4o-mini-tts"
,
voice
=
"alloy"
,
# Available voices: alloy, echo, fable, onyx, nova, shimmer
input
=
text
,
response_format
=
"mp3"
)
as
response
:
response
.
stream_to_file
(
temp_file_path
)
# Generate unique filename for MinIO
timestamp
=
int
(
time
.
time
())
filename
=
f
"tts_response_{timestamp}.mp3"
# The full path in MinIO will be audio/filename
minio_file_path
=
f
"audio/{filename}"
print
(
f
"Uploading generated audio to MinIO: {minio_file_path}"
)
# Upload to MinIO
with
open
(
temp_file_path
,
'rb'
)
as
audio_file
:
self
.
storage_repo
.
upload_file
(
audio_file
,
self
.
bucket
,
minio_file_path
)
# Clean up temporary file
if
os
.
path
.
exists
(
temp_file_path
):
os
.
unlink
(
temp_file_path
)
print
(
f
"Cleaned up temporary file: {temp_file_path}"
)
print
(
f
"Successfully generated and uploaded TTS audio: {filename}"
)
# Return just the filename, the audio service will add the audio/ prefix
return
filename
except
Exception
as
e
:
# Clean up temporary file in case of error
if
'temp_file_path'
in
locals
()
and
os
.
path
.
exists
(
temp_file_path
):
os
.
unlink
(
temp_file_path
)
print
(
f
"Error generating TTS audio: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Failed to generate audio: {str(e)}"
)
def
process_webhook_response
(
self
,
response
:
WebhookResponse
)
->
dict
:
def
process_webhook_response
(
self
,
response
:
WebhookResponse
)
->
dict
:
"""Process webhook response from n8n and generate TTS audio"""
print
(
"Received webhook data from n8n:"
,
response
.
dict
())
print
(
"Received webhook data from n8n:"
,
response
.
dict
())
if
response
.
agent_responded
==
'yes'
and
response
.
agent_response
:
if
response
.
agent_responded
==
'yes'
and
response
.
agent_response
:
if
response
.
filename
:
try
:
audio_file_path
=
self
.
audio_service
.
get_audio_file
(
response
.
filename
)
# Generate audio from the text response
self
.
response_manager
.
store_response
(
response
.
agent_response
,
audio_file_path
)
audio_filename
=
self
.
generate_audio_from_text
(
response
.
agent_response
)
print
(
"Agent response stored successfully."
)
else
:
# Store response with generated audio filename (just the filename, not the path)
self
.
response_manager
.
store_response
(
response
.
agent_response
,
audio_filename
)
print
(
"Agent response with generated TTS audio stored successfully."
)
except
HTTPException
:
# Re-raise HTTP exceptions
raise
except
Exception
as
e
:
print
(
f
"Error generating TTS audio, storing text-only response: {e}"
)
# Fallback to text-only response if audio generation fails
self
.
response_manager
.
store_response
(
response
.
agent_response
)
self
.
response_manager
.
store_response
(
response
.
agent_response
)
print
(
"Agent text response stored successfully."
)
print
(
"Stored text-only response as fallback."
)
else
:
print
(
"No valid agent response received from webhook."
)
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Webhook received and processed successfully."
}
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Webhook received and processed successfully."
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment