Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
3bc37f6b
Commit
3bc37f6b
authored
Nov 25, 2025
by
salma
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use the new RVC pipeline
parent
946afbe7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
37 additions
and
69 deletions
+37
-69
agent_prompts.py
...d_env/voice_agent/services/agent_helpers/agent_prompts.py
+6
-1
agent_service.py
self_hosted_env/voice_agent/services/agent_service.py
+1
-1
chat_service.py
self_hosted_env/voice_agent/services/chat_service.py
+1
-2
openai_service.py
self_hosted_env/voice_agent/services/openai_service.py
+2
-11
base_tts_service.py
self_hosted_env/voice_agent/services/tts/base_tts_service.py
+1
-8
custom_tts_service.py
...hosted_env/voice_agent/services/tts/custom_tts_service.py
+26
-46
No files found.
self_hosted_env/voice_agent/services/agent_helpers/agent_prompts.py
View file @
3bc37f6b
...
...
@@ -9,6 +9,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Egyptian + Arabic ----------
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ARABIC
):
"""
إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...
...
@@ -79,6 +80,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Saudi + Arabic ----------
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ARABIC
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنص الثابت هذا:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...
...
@@ -149,7 +151,9 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Egyptian English --------
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ENGLISH
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أَساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
...
...
@@ -218,6 +222,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Saudi English --------
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ENGLISH
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنصّ الثابت هذا:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...
...
self_hosted_env/voice_agent/services/agent_service.py
View file @
3bc37f6b
...
...
@@ -83,7 +83,7 @@ class AgentService:
# If we reach here, it means the response is a normal text string.
# Now it is safe to apply text-based fixes.
response
=
apply_fixes
(
response
,
custom_fixes
)
#
response = apply_fixes(response, custom_fixes)
# response = self.tashkeel_agent.apply_tashkeel(response)
print
(
f
"response: {response}"
)
...
...
self_hosted_env/voice_agent/services/chat_service.py
View file @
3bc37f6b
...
...
@@ -133,8 +133,7 @@ class ChatService:
def
_generate_and_upload_audio
(
self
,
text
:
str
,
student_id
:
str
)
->
dict
:
""" Segments text, generates TTS audio, and uploads to MinIO. """
try
:
segments
=
self
.
segmentation_service
.
segment_text
(
text
)
audio_bytes
=
self
.
agent_service
.
tts_service
.
generate_speech_from_sequence
(
segments
)
audio_bytes
=
self
.
agent_service
.
tts_service
.
generate_speech
(
text
)
timestamp
=
int
(
time
.
time
())
filename
=
f
"agent_response_{timestamp}_{student_id}.wav"
minio_file_path
=
f
"audio/{filename}"
...
...
self_hosted_env/voice_agent/services/openai_service.py
View file @
3bc37f6b
...
...
@@ -55,7 +55,7 @@ class OpenAIService(BaseTTSService):
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Transcription failed: {str(e)}"
)
# ------------------- TTS -------------------
def
generate_speech
(
self
,
text
:
str
,
language
:
str
=
"en"
)
->
bytes
:
def
generate_speech
(
self
,
text
:
str
)
->
bytes
:
"""Generate speech from text using OpenAI TTS. Returns raw audio bytes."""
if
not
self
.
is_available
():
raise
HTTPException
(
status_code
=
500
,
detail
=
"OpenAI service not available"
)
...
...
@@ -80,16 +80,7 @@ class OpenAIService(BaseTTSService):
print
(
f
"Error during OpenAI TTS generation: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"OpenAI TTS generation failed: {str(e)}"
)
def
generate_speech_from_sequence
(
self
,
segments
:
List
[
Dict
[
str
,
str
]])
->
bytes
:
"""
Fallback implementation for OpenAI. It combines the text from all
segments and makes a single TTS call.
"""
print
(
"OpenAI provider: combining segments for a single TTS call."
)
full_text
=
" "
.
join
([
segment
[
'text'
]
for
segment
in
segments
])
# Just call the existing simple method
return
self
.
generate_speech
(
full_text
)
# ------------------- Embeddings -------------------
...
...
self_hosted_env/voice_agent/services/tts/base_tts_service.py
View file @
3bc37f6b
...
...
@@ -13,7 +13,7 @@ class BaseTTSService(ABC):
pass
@
abstractmethod
def
generate_speech
(
self
,
text
:
str
,
language
:
str
=
"en"
)
->
bytes
:
def
generate_speech
(
self
,
text
:
str
)
->
bytes
:
"""
Generate speech from text.
...
...
@@ -26,10 +26,3 @@ class BaseTTSService(ABC):
"""
pass
@
abstractmethod
def
generate_speech_from_sequence
(
self
,
segments
:
List
[
Dict
[
str
,
str
]])
->
bytes
:
"""
Generates a single audio file from a list of language-tagged text segments.
This is for handling mixed-language sentences.
"""
pass
\ No newline at end of file
self_hosted_env/voice_agent/services/tts/custom_tts_service.py
View file @
3bc37f6b
import
os
import
httpx
from
typing
import
List
,
Dict
from
.base_tts_service
import
BaseTTSService
class
CustomTTSService
(
BaseTTSService
):
...
...
@@ -9,61 +8,42 @@ class CustomTTSService(BaseTTSService):
TTS Service implementation that calls our self-hosted, custom FastAPI model.
"""
def
__init__
(
self
):
# Read the URL of our FastAPI server from an environment variable
base_url
=
os
.
getenv
(
"CUSTOM_TTS_URL"
,
"http://localhost:5000"
)
self
.
api_url
=
f
"{base_url}/synthesize"
self
.
sequence_api_url
=
f
"{base_url}/synthesize_sequence"
self
.
api_url
=
f
"{base_url}/generate_audio"
self
.
_is_available
=
bool
(
base_url
)
print
(
f
"Custom TTS Service initialized.
Base URL: {base
_url}"
)
print
(
f
"Custom TTS Service initialized.
Target Endpoint: {self.api
_url}"
)
def
is_available
(
self
)
->
bool
:
return
self
.
_is_available
def
generate_speech
(
self
,
text
:
str
,
language
:
str
=
"en"
)
->
bytes
:
def
generate_speech
(
self
,
text
:
str
)
->
bytes
:
"""
Makes an HTTP POST request to the custom TTS FastAPI server.
Expected API Payload: {"text": "some text"}
Returns: Binary audio data (WAV)
"""
if
not
self
.
is_available
():
raise
ConnectionError
(
"Custom TTS service is not configured or available."
)
payload
=
{
"text"
:
text
}
try
:
# Use httpx for modern, async-friendly requests
with
httpx
.
Client
()
as
client
:
response
=
client
.
post
(
self
.
api_url
,
json
=
{
"text"
:
text
,
"language"
:
language
},
timeout
=
120.0
# Set a generous timeout for long text
)
with
httpx
.
Client
(
timeout
=
60.0
)
as
client
:
response
=
client
.
post
(
self
.
api_url
,
json
=
payload
)
#
Raise an exception for bad status codes (4xx or 5xx
)
#
Check if the request was successful (2xx status codes
)
response
.
raise_for_status
()
# The raw audio data is in the response content
audio_bytes
=
response
.
content
print
(
f
"Successfully received audio from custom TTS service for language '{language}'."
)
return
audio_bytes
except
httpx
.
RequestError
as
e
:
print
(
f
"Error calling custom TTS service: {e}"
)
# Re-raise as a standard ConnectionError
raise
ConnectionError
(
f
"Failed to connect to custom TTS service at {self.api_url}"
)
from
e
def
generate_speech_from_sequence
(
self
,
segments
:
List
[
Dict
[
str
,
str
]])
->
bytes
:
"""Makes a single POST request with the list of segments."""
if
not
self
.
is_available
():
raise
ConnectionError
(
"Custom TTS service is not configured."
)
try
:
with
httpx
.
Client
()
as
client
:
print
(
f
"Sending sequence of {len(segments)} segments to custom TTS service."
)
response
=
client
.
post
(
self
.
sequence_api_url
,
json
=
{
"segments"
:
segments
},
# Send the list directly
timeout
=
300.0
# Longer timeout for sequence processing
)
response
.
raise_for_status
()
audio_bytes
=
response
.
content
print
(
"Successfully received stitched audio from custom TTS service."
)
return
audio_bytes
except
httpx
.
RequestError
as
e
:
raise
ConnectionError
(
f
"Failed to connect to custom TTS at {self.sequence_api_url}"
)
from
e
\ No newline at end of file
# Return the binary content (the WAV file)
return
response
.
content
except
httpx
.
HTTPStatusError
as
exc
:
print
(
f
"Error response {exc.response.status_code} while requesting {exc.request.url!r}."
)
raise
exc
except
httpx
.
RequestError
as
exc
:
print
(
f
"An error occurred while requesting {exc.request.url!r}: {exc}"
)
raise
exc
except
Exception
as
e
:
print
(
f
"Unexpected error in CustomTTSService: {e}"
)
raise
e
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment