Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
b9438084
Commit
b9438084
authored
Sep 11, 2025
by
SalmaMohammedHamedMustafa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
whisper in backend and open ai service class
parent
ade545c7
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
237 additions
and
68 deletions
+237
-68
Dockerfile
self_hosted_env/voice_agent/Dockerfile
+2
-2
audio_handler.py
self_hosted_env/voice_agent/handlers/audio_handler.py
+65
-11
main.py
self_hosted_env/voice_agent/main.py
+19
-4
__init__.py
self_hosted_env/voice_agent/services/__init__.py
+1
-0
chat_service.py
self_hosted_env/voice_agent/services/chat_service.py
+12
-5
openai_service.py
self_hosted_env/voice_agent/services/openai_service.py
+123
-0
webhook_service.py
self_hosted_env/voice_agent/services/webhook_service.py
+15
-46
No files found.
self_hosted_env/voice_agent/Dockerfile
View file @
b9438084
...
@@ -7,5 +7,5 @@ COPY requirements.txt .
...
@@ -7,5 +7,5 @@ COPY requirements.txt .
RUN
pip
install
--no-cache-dir
-r
requirements.txt
RUN
pip
install
--no-cache-dir
-r
requirements.txt
COPY
. .
COPY
. .
#keep the container running always
CMD
["
python", "main.py
"]
CMD
["
sh", "-c", "while true; do sleep 30; done
"]
self_hosted_env/voice_agent/handlers/audio_handler.py
View file @
b9438084
from
fastapi
import
UploadFile
from
fastapi
import
UploadFile
,
HTTPException
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
import
sys
import
sys
import
io
sys
.
path
.
append
(
"../"
)
sys
.
path
.
append
(
"../"
)
from
repositories
import
WebhookClient
,
StorageRepository
from
repositories
import
WebhookClient
,
StorageRepository
from
core
import
MessageType
,
ResponseStatus
from
core
import
MessageType
,
ResponseStatus
from
services
import
OpenAIService
class
MessageHandler
(
ABC
):
class
MessageHandler
(
ABC
):
...
@@ -11,19 +13,71 @@ class MessageHandler(ABC):
...
@@ -11,19 +13,71 @@ class MessageHandler(ABC):
def
handle
(
self
,
**
kwargs
)
->
dict
:
def
handle
(
self
,
**
kwargs
)
->
dict
:
pass
pass
class
AudioMessageHandler
():
class
AudioMessageHandler
():
def
__init__
(
self
,
storage_repo
:
StorageRepository
,
webhook_client
:
WebhookClient
,
bucket
:
str
):
def
__init__
(
self
,
storage_repo
:
StorageRepository
,
webhook_client
:
WebhookClient
,
bucket
:
str
,
openai_service
:
OpenAIService
):
self
.
storage_repo
=
storage_repo
self
.
storage_repo
=
storage_repo
self
.
webhook_client
=
webhook_client
self
.
webhook_client
=
webhook_client
self
.
bucket
=
bucket
self
.
bucket
=
bucket
self
.
openai_service
=
openai_service
def
handle
(
self
,
file
:
UploadFile
,
**
kwargs
)
->
dict
:
def
handle
(
self
,
file
:
UploadFile
,
**
kwargs
)
->
dict
:
file_path
=
f
"audio/{file.filename}"
try
:
self
.
storage_repo
.
upload_file
(
file
.
file
,
self
.
bucket
,
file_path
)
# Read file content
print
(
f
"Uploaded {file.filename} to MinIO."
)
file
.
file
.
seek
(
0
)
file_content
=
file
.
file
.
read
()
payload
=
{
"type"
:
MessageType
.
AUDIO
,
"filename"
:
file
.
filename
}
self
.
webhook_client
.
send_webhook
(
payload
)
if
not
file_content
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"Empty audio file received"
)
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Message received and forwarded to n8n."
}
# Upload original file to MinIO for backup
file_path
=
f
"audio/{file.filename}"
file_stream
=
io
.
BytesIO
(
file_content
)
self
.
storage_repo
.
upload_file
(
file_stream
,
self
.
bucket
,
file_path
)
print
(
f
"Uploaded {file.filename} to MinIO at {file_path}"
)
# Try to transcribe the audio using OpenAI service
try
:
transcribed_text
=
self
.
openai_service
.
transcribe_audio
(
file_content
,
file
.
filename
)
# Send transcribed text to n8n
payload
=
{
"type"
:
MessageType
.
AUDIO
,
"message"
:
transcribed_text
,
}
self
.
webhook_client
.
send_webhook
(
payload
)
print
(
f
"Sent transcribed text to n8n: {transcribed_text[:100]}..."
)
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Audio transcribed and forwarded to n8n."
,
"transcription"
:
transcribed_text
}
except
Exception
as
transcription_error
:
print
(
f
"Transcription failed: {transcription_error}"
)
# Fallback: send filename to n8n for processing there
payload
=
{
"type"
:
MessageType
.
AUDIO
,
"filename"
:
file
.
filename
,
"message"
:
"تم تسجيل رسالة صوتية - فشل في التفريغ المحلي"
,
"transcription_source"
:
"fallback"
,
"error"
:
str
(
transcription_error
)
}
self
.
webhook_client
.
send_webhook
(
payload
)
print
(
f
"Sent filename to n8n as fallback: {file.filename}"
)
return
{
"status"
:
ResponseStatus
.
SUCCESS
,
"message"
:
"Audio uploaded, transcription failed - sent to n8n for processing."
,
"filename"
:
file
.
filename
,
"transcription_error"
:
str
(
transcription_error
)
}
except
Exception
as
e
:
print
(
f
"Error processing audio message: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Failed to process audio message: {str(e)}"
)
\ No newline at end of file
self_hosted_env/voice_agent/main.py
View file @
b9438084
...
@@ -22,18 +22,33 @@ from schemas import WebhookResponse, TextMessage
...
@@ -22,18 +22,33 @@ from schemas import WebhookResponse, TextMessage
from
repositories
import
StorageRepository
,
MinIOStorageRepository
from
repositories
import
StorageRepository
,
MinIOStorageRepository
from
repositories
import
WebhookClient
,
N8NWebhookClient
from
repositories
import
WebhookClient
,
N8NWebhookClient
from
handlers
import
AudioMessageHandler
,
TextMessageHandler
from
handlers
import
AudioMessageHandler
,
TextMessageHandler
from
services
import
AudioService
,
ChatService
,
HealthService
,
ResponseService
,
ResponseManager
,
WebhookService
from
services
import
AudioService
,
ChatService
,
HealthService
,
ResponseService
,
ResponseManager
,
WebhookService
,
OpenAIService
# Dependency Container
class
DIContainer
:
class
DIContainer
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
config
=
AppConfig
.
from_env
()
self
.
config
=
AppConfig
.
from_env
()
self
.
storage_repo
=
MinIOStorageRepository
(
self
.
config
)
self
.
storage_repo
=
MinIOStorageRepository
(
self
.
config
)
self
.
webhook_client
=
N8NWebhookClient
(
self
.
config
.
n8n_webhook_url
)
self
.
webhook_client
=
N8NWebhookClient
(
self
.
config
.
n8n_webhook_url
)
self
.
response_manager
=
ResponseManager
()
self
.
response_manager
=
ResponseManager
()
# Initialize OpenAI service
self
.
openai_service
=
OpenAIService
()
# Updated services to use OpenAI service
self
.
audio_service
=
AudioService
(
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
audio_service
=
AudioService
(
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
chat_service
=
ChatService
(
self
.
storage_repo
,
self
.
webhook_client
,
self
.
response_manager
,
self
.
config
)
self
.
chat_service
=
ChatService
(
self
.
webhook_service
=
WebhookService
(
self
.
response_manager
,
self
.
storage_repo
,
self
.
config
.
minio_bucket
)
self
.
storage_repo
,
self
.
webhook_client
,
self
.
response_manager
,
self
.
config
,
self
.
openai_service
# Pass OpenAI service
)
self
.
webhook_service
=
WebhookService
(
self
.
response_manager
,
self
.
storage_repo
,
self
.
config
.
minio_bucket
,
self
.
openai_service
# Pass OpenAI service
)
self
.
response_service
=
ResponseService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
response_service
=
ResponseService
(
self
.
response_manager
,
self
.
audio_service
)
self
.
health_service
=
HealthService
(
self
.
storage_repo
,
self
.
config
)
self
.
health_service
=
HealthService
(
self
.
storage_repo
,
self
.
config
)
...
...
self_hosted_env/voice_agent/services/__init__.py
View file @
b9438084
...
@@ -4,3 +4,4 @@ from .health_service import HealthService
...
@@ -4,3 +4,4 @@ from .health_service import HealthService
from
.response_service
import
ResponseService
from
.response_service
import
ResponseService
from
.response_manager
import
ResponseManager
from
.response_manager
import
ResponseManager
from
.webhook_service
import
WebhookService
from
.webhook_service
import
WebhookService
from
.openai_service
import
OpenAIService
self_hosted_env/voice_agent/services/chat_service.py
View file @
b9438084
...
@@ -5,21 +5,28 @@ import os
...
@@ -5,21 +5,28 @@ import os
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
from
core
import
MessageType
,
AppConfig
from
core
import
MessageType
,
AppConfig
from
repositories
import
StorageRepository
,
WebhookClient
from
repositories
import
StorageRepository
,
WebhookClient
from
handlers
import
AudioMessageHandler
,
TextMessageHandler
from
services.response_manager
import
ResponseManager
from
services.response_manager
import
ResponseManager
from
services.openai_service
import
OpenAIService
class
ChatService
:
class
ChatService
:
def
__init__
(
self
,
storage_repo
:
StorageRepository
,
webhook_client
:
WebhookClient
,
def
__init__
(
self
,
storage_repo
:
StorageRepository
,
webhook_client
:
WebhookClient
,
response_manager
:
ResponseManager
,
config
:
AppConfig
):
response_manager
:
ResponseManager
,
config
:
AppConfig
,
openai_service
:
OpenAIService
):
from
handlers
import
AudioMessageHandler
,
TextMessageHandler
self
.
storage_repo
=
storage_repo
self
.
storage_repo
=
storage_repo
self
.
webhook_client
=
webhook_client
self
.
webhook_client
=
webhook_client
self
.
response_manager
=
response_manager
self
.
response_manager
=
response_manager
self
.
config
=
config
self
.
config
=
config
self
.
openai_service
=
openai_service
# Message handlers
# Message handlers
with OpenAI service dependency
self
.
handlers
=
{
self
.
handlers
=
{
MessageType
.
AUDIO
:
AudioMessageHandler
(
storage_repo
,
webhook_client
,
config
.
minio_bucket
),
MessageType
.
AUDIO
:
AudioMessageHandler
(
storage_repo
,
webhook_client
,
config
.
minio_bucket
,
openai_service
# Pass OpenAI service
),
MessageType
.
TEXT
:
TextMessageHandler
(
webhook_client
)
MessageType
.
TEXT
:
TextMessageHandler
(
webhook_client
)
}
}
...
@@ -31,4 +38,4 @@ class ChatService:
...
@@ -31,4 +38,4 @@ class ChatService:
elif
text
:
elif
text
:
return
self
.
handlers
[
MessageType
.
TEXT
]
.
handle
(
text
=
text
)
return
self
.
handlers
[
MessageType
.
TEXT
]
.
handle
(
text
=
text
)
else
:
else
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"No text or audio file provided."
)
raise
HTTPException
(
status_code
=
400
,
detail
=
"No text or audio file provided."
)
\ No newline at end of file
self_hosted_env/voice_agent/services/openai_service.py
0 → 100644
View file @
b9438084
import
os
import
time
import
tempfile
import
io
from
typing
import
Optional
from
fastapi
import
HTTPException
from
openai
import
OpenAI
class
OpenAIService
:
"""Service class for handling OpenAI API operations (TTS and Whisper)"""
def
__init__
(
self
):
self
.
api_key
=
os
.
getenv
(
"OPENAI_API_KEY"
)
if
not
self
.
api_key
:
print
(
"Warning: OPENAI_API_KEY not found. OpenAI services will be disabled."
)
self
.
client
=
None
else
:
self
.
client
=
OpenAI
(
api_key
=
self
.
api_key
)
def
is_available
(
self
)
->
bool
:
"""Check if OpenAI service is available"""
return
self
.
client
is
not
None
def
transcribe_audio
(
self
,
file_content
:
bytes
,
filename
:
str
,
language
:
Optional
[
str
]
=
"ar"
)
->
str
:
"""
Transcribe audio using OpenAI Whisper
Args:
file_content: Audio file content as bytes
filename: Original filename for context
language: Language code (optional, defaults to Arabic)
Returns:
Transcribed text
Raises:
HTTPException: If transcription fails or service unavailable
"""
if
not
self
.
is_available
():
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI service not available"
)
try
:
# Create file-like object for the API
audio_file
=
io
.
BytesIO
(
file_content
)
audio_file
.
name
=
filename
print
(
f
"Transcribing audio: {filename}"
)
# Call Whisper API
transcript
=
self
.
client
.
audio
.
transcriptions
.
create
(
model
=
"whisper-1"
,
file
=
audio_file
,
language
=
language
if
language
else
None
# Auto-detect if None
)
transcribed_text
=
transcript
.
text
.
strip
()
if
not
transcribed_text
:
raise
ValueError
(
"Empty transcription result"
)
print
(
f
"Transcription successful: {transcribed_text[:100]}..."
)
return
transcribed_text
except
Exception
as
e
:
print
(
f
"Error during transcription: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Transcription failed: {str(e)}"
)
def
generate_speech
(
self
,
text
:
str
,
voice
:
str
=
"alloy"
,
model
:
str
=
"tts-1"
)
->
str
:
"""
Generate speech from text using OpenAI TTS
Args:
text: Text to convert to speech
voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer)
model: TTS model to use (tts-1 or tts-1-hd)
Returns:
Path to temporary file containing the generated audio
Raises:
HTTPException: If TTS generation fails or service unavailable
"""
if
not
self
.
is_available
():
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI service not available"
)
temp_file_path
=
None
try
:
# Create temporary file
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
temp_file_path
=
temp_file
.
name
temp_file
.
close
()
print
(
f
"Generating TTS audio: {text[:50]}..."
)
# Generate audio using OpenAI TTS
with
self
.
client
.
audio
.
speech
.
with_streaming_response
.
create
(
model
=
model
,
voice
=
voice
,
input
=
text
,
response_format
=
"mp3"
)
as
response
:
response
.
stream_to_file
(
temp_file_path
)
print
(
f
"TTS generation successful, saved to: {temp_file_path}"
)
return
temp_file_path
except
Exception
as
e
:
# Clean up temp file on error
if
temp_file_path
and
os
.
path
.
exists
(
temp_file_path
):
os
.
unlink
(
temp_file_path
)
print
(
f
"Error during TTS generation: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"TTS generation failed: {str(e)}"
)
def
cleanup_temp_file
(
self
,
file_path
:
str
)
->
None
:
"""Clean up temporary file"""
if
file_path
and
os
.
path
.
exists
(
file_path
):
try
:
os
.
unlink
(
file_path
)
print
(
f
"Cleaned up temporary file: {file_path}"
)
except
Exception
as
e
:
print
(
f
"Warning: Could not clean up temp file {file_path}: {e}"
)
\ No newline at end of file
self_hosted_env/voice_agent/services/webhook_service.py
View file @
b9438084
from
fastapi
import
HTTPException
from
fastapi
import
HTTPException
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
typing
import
Optional
from
typing
import
Optional
from
enum
import
Enum
import
sys
import
sys
import
os
import
os
import
time
import
time
import
tempfile
from
openai
import
OpenAI
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
from
core
import
ResponseStatus
from
core
import
ResponseStatus
from
schemas
import
WebhookResponse
from
schemas
import
WebhookResponse
from
services.response_manager
import
ResponseManager
from
services.response_manager
import
ResponseManager
from
services.openai_service
import
OpenAIService
from
repositories
import
StorageRepository
from
repositories
import
StorageRepository
class
WebhookService
:
class
WebhookService
:
def
__init__
(
self
,
response_manager
:
ResponseManager
,
storage_repo
:
StorageRepository
,
bucket
:
str
):
def
__init__
(
self
,
response_manager
:
ResponseManager
,
storage_repo
:
StorageRepository
,
bucket
:
str
,
openai_service
:
OpenAIService
):
self
.
response_manager
=
response_manager
self
.
response_manager
=
response_manager
self
.
storage_repo
=
storage_repo
self
.
storage_repo
=
storage_repo
self
.
bucket
=
bucket
self
.
bucket
=
bucket
self
.
openai_service
=
openai_service
# Initialize OpenAI client
openai_api_key
=
os
.
getenv
(
"OPENAI_API_KEY"
)
if
not
openai_api_key
:
print
(
"Warning: OPENAI_API_KEY not found. TTS functionality will be disabled."
)
self
.
openai_client
=
None
else
:
self
.
openai_client
=
OpenAI
(
api_key
=
openai_api_key
)
def
generate_audio_from_text
(
self
,
text
:
str
)
->
str
:
def
generate_and_upload_audio
(
self
,
text
:
str
)
->
str
:
"""Generate audio from text using OpenAI TTS and upload to MinIO"""
"""Generate audio from text and upload to MinIO, return filename"""
if
not
self
.
openai_client
:
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI API key not configured"
)
try
:
try
:
# Create temporary file for audio
# Generate audio using OpenAI service
temp_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
,
suffix
=
".mp3"
)
temp_file_path
=
self
.
openai_service
.
generate_speech
(
text
)
temp_file_path
=
temp_file
.
name
temp_file
.
close
()
print
(
f
"Generating TTS audio for text: {text[:50]}..."
)
# Generate unique filename for MinIO
# Generate audio using OpenAI TTS
with
self
.
openai_client
.
audio
.
speech
.
with_streaming_response
.
create
(
model
=
"gpt-4o-mini-tts"
,
voice
=
"alloy"
,
# Available voices: alloy, echo, fable, onyx, nova, shimmer
input
=
text
,
response_format
=
"mp3"
)
as
response
:
response
.
stream_to_file
(
temp_file_path
)
# Generate unique filename for MinIO
timestamp
=
int
(
time
.
time
())
timestamp
=
int
(
time
.
time
())
filename
=
f
"tts_response_{timestamp}.mp3"
filename
=
f
"tts_response_{timestamp}.mp3"
# The full path in MinIO will be audio/filename
minio_file_path
=
f
"audio/{filename}"
minio_file_path
=
f
"audio/{filename}"
print
(
f
"Uploading generated audio to MinIO: {minio_file_path}"
)
print
(
f
"Uploading generated audio to MinIO: {minio_file_path}"
)
...
@@ -62,20 +38,13 @@ class WebhookService:
...
@@ -62,20 +38,13 @@ class WebhookService:
self
.
storage_repo
.
upload_file
(
audio_file
,
self
.
bucket
,
minio_file_path
)
self
.
storage_repo
.
upload_file
(
audio_file
,
self
.
bucket
,
minio_file_path
)
# Clean up temporary file
# Clean up temporary file
if
os
.
path
.
exists
(
temp_file_path
):
self
.
openai_service
.
cleanup_temp_file
(
temp_file_path
)
os
.
unlink
(
temp_file_path
)
print
(
f
"Cleaned up temporary file: {temp_file_path}"
)
print
(
f
"Successfully generated and uploaded TTS audio: {filename}"
)
print
(
f
"Successfully generated and uploaded TTS audio: {filename}"
)
# Return just the filename, the audio service will add the audio/ prefix
return
filename
return
filename
except
Exception
as
e
:
except
Exception
as
e
:
# Clean up temporary file in case of error
print
(
f
"Error generating and uploading audio: {e}"
)
if
'temp_file_path'
in
locals
()
and
os
.
path
.
exists
(
temp_file_path
):
os
.
unlink
(
temp_file_path
)
print
(
f
"Error generating TTS audio: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Failed to generate audio: {str(e)}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Failed to generate audio: {str(e)}"
)
def
process_webhook_response
(
self
,
response
:
WebhookResponse
)
->
dict
:
def
process_webhook_response
(
self
,
response
:
WebhookResponse
)
->
dict
:
...
@@ -84,10 +53,10 @@ class WebhookService:
...
@@ -84,10 +53,10 @@ class WebhookService:
if
response
.
agent_responded
==
'yes'
and
response
.
agent_response
:
if
response
.
agent_responded
==
'yes'
and
response
.
agent_response
:
try
:
try
:
# Generate audio from the text response
# Generate audio from the text response
using OpenAI service
audio_filename
=
self
.
generate_a
udio_from_text
(
response
.
agent_response
)
audio_filename
=
self
.
generate_a
nd_upload_audio
(
response
.
agent_response
)
# Store response with generated audio filename
(just the filename, not the path)
# Store response with generated audio filename
self
.
response_manager
.
store_response
(
response
.
agent_response
,
audio_filename
)
self
.
response_manager
.
store_response
(
response
.
agent_response
,
audio_filename
)
print
(
"Agent response with generated TTS audio stored successfully."
)
print
(
"Agent response with generated TTS audio stored successfully."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment