Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
3bc37f6b
Commit
3bc37f6b
authored
Nov 25, 2025
by
salma
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use the new RVC pipeline
parent
946afbe7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
37 additions
and
69 deletions
+37
-69
agent_prompts.py
...d_env/voice_agent/services/agent_helpers/agent_prompts.py
+6
-1
agent_service.py
self_hosted_env/voice_agent/services/agent_service.py
+1
-1
chat_service.py
self_hosted_env/voice_agent/services/chat_service.py
+1
-2
openai_service.py
self_hosted_env/voice_agent/services/openai_service.py
+2
-11
base_tts_service.py
self_hosted_env/voice_agent/services/tts/base_tts_service.py
+1
-8
custom_tts_service.py
...hosted_env/voice_agent/services/tts/custom_tts_service.py
+26
-46
No files found.
self_hosted_env/voice_agent/services/agent_helpers/agent_prompts.py
View file @
3bc37f6b
...
@@ -9,6 +9,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
...
@@ -9,6 +9,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Egyptian + Arabic ----------
# ---------- Egyptian + Arabic ----------
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ARABIC
):
"""
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ARABIC
):
"""
إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
رُد بالنصّ الثابت ده:
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...
@@ -79,6 +80,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
...
@@ -79,6 +80,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Saudi + Arabic ----------
# ---------- Saudi + Arabic ----------
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ARABIC
):
"""
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ARABIC
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
فقط لو الطفل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنص الثابت هذا:
رُد بالنص الثابت هذا:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...
@@ -149,7 +151,9 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
...
@@ -149,7 +151,9 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Egyptian English --------
# -------- Egyptian English --------
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ENGLISH
):
"""
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ENGLISH
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
رُد بالنصّ الثابت ده:
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أَساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
وأنا هنا عشان أَساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
...
@@ -218,6 +222,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
...
@@ -218,6 +222,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Saudi English --------
# -------- Saudi English --------
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ENGLISH
):
"""
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ENGLISH
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنصّ الثابت هذا:
رُد بالنصّ الثابت هذا:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...
...
self_hosted_env/voice_agent/services/agent_service.py
View file @
3bc37f6b
...
@@ -83,7 +83,7 @@ class AgentService:
...
@@ -83,7 +83,7 @@ class AgentService:
# If we reach here, it means the response is a normal text string.
# If we reach here, it means the response is a normal text string.
# Now it is safe to apply text-based fixes.
# Now it is safe to apply text-based fixes.
response
=
apply_fixes
(
response
,
custom_fixes
)
#
response = apply_fixes(response, custom_fixes)
# response = self.tashkeel_agent.apply_tashkeel(response)
# response = self.tashkeel_agent.apply_tashkeel(response)
print
(
f
"response: {response}"
)
print
(
f
"response: {response}"
)
...
...
self_hosted_env/voice_agent/services/chat_service.py
View file @
3bc37f6b
...
@@ -133,8 +133,7 @@ class ChatService:
...
@@ -133,8 +133,7 @@ class ChatService:
def
_generate_and_upload_audio
(
self
,
text
:
str
,
student_id
:
str
)
->
dict
:
def
_generate_and_upload_audio
(
self
,
text
:
str
,
student_id
:
str
)
->
dict
:
""" Segments text, generates TTS audio, and uploads to MinIO. """
""" Segments text, generates TTS audio, and uploads to MinIO. """
try
:
try
:
segments
=
self
.
segmentation_service
.
segment_text
(
text
)
audio_bytes
=
self
.
agent_service
.
tts_service
.
generate_speech
(
text
)
audio_bytes
=
self
.
agent_service
.
tts_service
.
generate_speech_from_sequence
(
segments
)
timestamp
=
int
(
time
.
time
())
timestamp
=
int
(
time
.
time
())
filename
=
f
"agent_response_{timestamp}_{student_id}.wav"
filename
=
f
"agent_response_{timestamp}_{student_id}.wav"
minio_file_path
=
f
"audio/{filename}"
minio_file_path
=
f
"audio/{filename}"
...
...
self_hosted_env/voice_agent/services/openai_service.py
View file @
3bc37f6b
...
@@ -55,7 +55,7 @@ class OpenAIService(BaseTTSService):
...
@@ -55,7 +55,7 @@ class OpenAIService(BaseTTSService):
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Transcription failed: {str(e)}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Transcription failed: {str(e)}"
)
# ------------------- TTS -------------------
# ------------------- TTS -------------------
def
generate_speech
(
self
,
text
:
str
,
language
:
str
=
"en"
)
->
bytes
:
def
generate_speech
(
self
,
text
:
str
)
->
bytes
:
"""Generate speech from text using OpenAI TTS. Returns raw audio bytes."""
"""Generate speech from text using OpenAI TTS. Returns raw audio bytes."""
if
not
self
.
is_available
():
if
not
self
.
is_available
():
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI service not available"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI service not available"
)
...
@@ -80,16 +80,7 @@ class OpenAIService(BaseTTSService):
...
@@ -80,16 +80,7 @@ class OpenAIService(BaseTTSService):
print
(
f
"Error during OpenAI TTS generation: {e}"
)
print
(
f
"Error during OpenAI TTS generation: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"OpenAI TTS generation failed: {str(e)}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"OpenAI TTS generation failed: {str(e)}"
)
def
generate_speech_from_sequence
(
self
,
segments
:
List
[
Dict
[
str
,
str
]])
->
bytes
:
"""
Fallback implementation for OpenAI. It combines the text from all
segments and makes a single TTS call.
"""
print
(
"OpenAI provider: combining segments for a single TTS call."
)
full_text
=
" "
.
join
([
segment
[
'text'
]
for
segment
in
segments
])
# Just call the existing simple method
return
self
.
generate_speech
(
full_text
)
# ------------------- Embeddings -------------------
# ------------------- Embeddings -------------------
...
...
self_hosted_env/voice_agent/services/tts/base_tts_service.py
View file @
3bc37f6b
...
@@ -13,7 +13,7 @@ class BaseTTSService(ABC):
...
@@ -13,7 +13,7 @@ class BaseTTSService(ABC):
pass
pass
@
abstractmethod
@
abstractmethod
def
generate_speech
(
self
,
text
:
str
,
language
:
str
=
"en"
)
->
bytes
:
def
generate_speech
(
self
,
text
:
str
)
->
bytes
:
"""
"""
Generate speech from text.
Generate speech from text.
...
@@ -26,10 +26,3 @@ class BaseTTSService(ABC):
...
@@ -26,10 +26,3 @@ class BaseTTSService(ABC):
"""
"""
pass
pass
@
abstractmethod
def
generate_speech_from_sequence
(
self
,
segments
:
List
[
Dict
[
str
,
str
]])
->
bytes
:
"""
Generates a single audio file from a list of language-tagged text segments.
This is for handling mixed-language sentences.
"""
pass
\ No newline at end of file
self_hosted_env/voice_agent/services/tts/custom_tts_service.py
View file @
3bc37f6b
import
os
import
os
import
httpx
import
httpx
from
typing
import
List
,
Dict
from
typing
import
List
,
Dict
from
.base_tts_service
import
BaseTTSService
from
.base_tts_service
import
BaseTTSService
class
CustomTTSService
(
BaseTTSService
):
class
CustomTTSService
(
BaseTTSService
):
...
@@ -9,61 +8,42 @@ class CustomTTSService(BaseTTSService):
...
@@ -9,61 +8,42 @@ class CustomTTSService(BaseTTSService):
TTS Service implementation that calls our self-hosted, custom FastAPI model.
TTS Service implementation that calls our self-hosted, custom FastAPI model.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
# Read the URL of our FastAPI server from an environment variable
base_url
=
os
.
getenv
(
"CUSTOM_TTS_URL"
,
"http://localhost:5000"
)
base_url
=
os
.
getenv
(
"CUSTOM_TTS_URL"
,
"http://localhost:5000"
)
self
.
api_url
=
f
"{base_url}/synthesize"
self
.
sequence_api_url
=
f
"{base_url}/synthesize_sequence"
self
.
api_url
=
f
"{base_url}/generate_audio"
self
.
_is_available
=
bool
(
base_url
)
self
.
_is_available
=
bool
(
base_url
)
print
(
f
"Custom TTS Service initialized.
Base URL: {base
_url}"
)
print
(
f
"Custom TTS Service initialized.
Target Endpoint: {self.api
_url}"
)
def
is_available
(
self
)
->
bool
:
def
is_available
(
self
)
->
bool
:
return
self
.
_is_available
return
self
.
_is_available
def
generate_speech
(
self
,
text
:
str
,
language
:
str
=
"en"
)
->
bytes
:
def
generate_speech
(
self
,
text
:
str
)
->
bytes
:
"""
"""
Makes an HTTP POST request to the custom TTS FastAPI server.
Makes an HTTP POST request to the custom TTS FastAPI server.
Expected API Payload: {"text": "some text"}
Returns: Binary audio data (WAV)
"""
"""
if
not
self
.
is_available
():
payload
=
{
raise
ConnectionError
(
"Custom TTS service is not configured or available."
)
"text"
:
text
}
try
:
try
:
# Use httpx for modern, async-friendly requests
with
httpx
.
Client
(
timeout
=
60.0
)
as
client
:
with
httpx
.
Client
()
as
client
:
response
=
client
.
post
(
self
.
api_url
,
json
=
payload
)
response
=
client
.
post
(
self
.
api_url
,
json
=
{
"text"
:
text
,
"language"
:
language
},
timeout
=
120.0
# Set a generous timeout for long text
)
#
Raise an exception for bad status codes (4xx or 5xx
)
#
Check if the request was successful (2xx status codes
)
response
.
raise_for_status
()
response
.
raise_for_status
()
# The raw audio data is in the response content
# Return the binary content (the WAV file)
audio_bytes
=
response
.
content
return
response
.
content
print
(
f
"Successfully received audio from custom TTS service for language '{language}'."
)
return
audio_bytes
except
httpx
.
HTTPStatusError
as
exc
:
print
(
f
"Error response {exc.response.status_code} while requesting {exc.request.url!r}."
)
except
httpx
.
RequestError
as
e
:
raise
exc
print
(
f
"Error calling custom TTS service: {e}"
)
except
httpx
.
RequestError
as
exc
:
# Re-raise as a standard ConnectionError
print
(
f
"An error occurred while requesting {exc.request.url!r}: {exc}"
)
raise
ConnectionError
(
f
"Failed to connect to custom TTS service at {self.api_url}"
)
from
e
raise
exc
except
Exception
as
e
:
def
generate_speech_from_sequence
(
self
,
segments
:
List
[
Dict
[
str
,
str
]])
->
bytes
:
print
(
f
"Unexpected error in CustomTTSService: {e}"
)
"""Makes a single POST request with the list of segments."""
raise
e
if
not
self
.
is_available
():
\ No newline at end of file
raise
ConnectionError
(
"Custom TTS service is not configured."
)
try
:
with
httpx
.
Client
()
as
client
:
print
(
f
"Sending sequence of {len(segments)} segments to custom TTS service."
)
response
=
client
.
post
(
self
.
sequence_api_url
,
json
=
{
"segments"
:
segments
},
# Send the list directly
timeout
=
300.0
# Longer timeout for sequence processing
)
response
.
raise_for_status
()
audio_bytes
=
response
.
content
print
(
"Successfully received stitched audio from custom TTS service."
)
return
audio_bytes
except
httpx
.
RequestError
as
e
:
raise
ConnectionError
(
f
"Failed to connect to custom TTS at {self.sequence_api_url}"
)
from
e
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment