Commit 3bc37f6b authored by salma's avatar salma

use the new RVC pipeline

parent 946afbe7
...@@ -9,6 +9,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = { ...@@ -9,6 +9,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Egyptian + Arabic ---------- # ---------- Egyptian + Arabic ----------
(StudentNationality.EGYPTIAN, StudyLanguage.ARABIC): """ (StudentNationality.EGYPTIAN, StudyLanguage.ARABIC): """
إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")، فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
رُد بالنصّ الثابت ده: رُد بالنصّ الثابت ده:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab، "أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...@@ -79,6 +80,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = { ...@@ -79,6 +80,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Saudi + Arabic ---------- # ---------- Saudi + Arabic ----------
(StudentNationality.SAUDI, StudyLanguage.ARABIC): """ (StudentNationality.SAUDI, StudyLanguage.ARABIC): """
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")، فقط لو الطفل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنص الثابت هذا: رُد بالنص الثابت هذا:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab، "أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
...@@ -149,7 +151,9 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = { ...@@ -149,7 +151,9 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Egyptian English -------- # -------- Egyptian English --------
(StudentNationality.EGYPTIAN, StudyLanguage.ENGLISH): """ (StudentNationality.EGYPTIAN, StudyLanguage.ENGLISH): """
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")، إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
رُد بالنصّ الثابت ده: رُد بالنصّ الثابت ده:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab، "أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أَساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم." وأنا هنا عشان أَساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
...@@ -218,6 +222,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = { ...@@ -218,6 +222,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Saudi English -------- # -------- Saudi English --------
(StudentNationality.SAUDI, StudyLanguage.ENGLISH): """ (StudentNationality.SAUDI, StudyLanguage.ENGLISH): """
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")، لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنصّ الثابت هذا: رُد بالنصّ الثابت هذا:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab، "أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
......
...@@ -83,7 +83,7 @@ class AgentService: ...@@ -83,7 +83,7 @@ class AgentService:
# If we reach here, it means the response is a normal text string. # If we reach here, it means the response is a normal text string.
# Now it is safe to apply text-based fixes. # Now it is safe to apply text-based fixes.
response = apply_fixes(response, custom_fixes) #response = apply_fixes(response, custom_fixes)
# response = self.tashkeel_agent.apply_tashkeel(response) # response = self.tashkeel_agent.apply_tashkeel(response)
print(f"response: {response}") print(f"response: {response}")
......
...@@ -133,8 +133,7 @@ class ChatService: ...@@ -133,8 +133,7 @@ class ChatService:
def _generate_and_upload_audio(self, text: str, student_id: str) -> dict: def _generate_and_upload_audio(self, text: str, student_id: str) -> dict:
""" Segments text, generates TTS audio, and uploads to MinIO. """ """ Segments text, generates TTS audio, and uploads to MinIO. """
try: try:
segments = self.segmentation_service.segment_text(text) audio_bytes = self.agent_service.tts_service.generate_speech(text)
audio_bytes = self.agent_service.tts_service.generate_speech_from_sequence(segments)
timestamp = int(time.time()) timestamp = int(time.time())
filename = f"agent_response_{timestamp}_{student_id}.wav" filename = f"agent_response_{timestamp}_{student_id}.wav"
minio_file_path = f"audio/{filename}" minio_file_path = f"audio/{filename}"
......
...@@ -55,7 +55,7 @@ class OpenAIService(BaseTTSService): ...@@ -55,7 +55,7 @@ class OpenAIService(BaseTTSService):
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}") raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
# ------------------- TTS ------------------- # ------------------- TTS -------------------
def generate_speech(self, text: str, language: str = "en") -> bytes: def generate_speech(self, text: str) -> bytes:
"""Generate speech from text using OpenAI TTS. Returns raw audio bytes.""" """Generate speech from text using OpenAI TTS. Returns raw audio bytes."""
if not self.is_available(): if not self.is_available():
raise HTTPException(status_code=500, detail="OpenAI service not available") raise HTTPException(status_code=500, detail="OpenAI service not available")
...@@ -80,16 +80,7 @@ class OpenAIService(BaseTTSService): ...@@ -80,16 +80,7 @@ class OpenAIService(BaseTTSService):
print(f"Error during OpenAI TTS generation: {e}") print(f"Error during OpenAI TTS generation: {e}")
raise HTTPException(status_code=500, detail=f"OpenAI TTS generation failed: {str(e)}") raise HTTPException(status_code=500, detail=f"OpenAI TTS generation failed: {str(e)}")
def generate_speech_from_sequence(self, segments: List[Dict[str, str]]) -> bytes:
"""
Fallback implementation for OpenAI. It combines the text from all
segments and makes a single TTS call.
"""
print("OpenAI provider: combining segments for a single TTS call.")
full_text = " ".join([segment['text'] for segment in segments])
# Just call the existing simple method
return self.generate_speech(full_text)
# ------------------- Embeddings ------------------- # ------------------- Embeddings -------------------
......
...@@ -13,7 +13,7 @@ class BaseTTSService(ABC): ...@@ -13,7 +13,7 @@ class BaseTTSService(ABC):
pass pass
@abstractmethod @abstractmethod
def generate_speech(self, text: str, language: str = "en") -> bytes: def generate_speech(self, text: str) -> bytes:
""" """
Generate speech from text. Generate speech from text.
...@@ -26,10 +26,3 @@ class BaseTTSService(ABC): ...@@ -26,10 +26,3 @@ class BaseTTSService(ABC):
""" """
pass pass
@abstractmethod
def generate_speech_from_sequence(self, segments: List[Dict[str, str]]) -> bytes:
"""
Generates a single audio file from a list of language-tagged text segments.
This is for handling mixed-language sentences.
"""
pass
\ No newline at end of file
import os import os
import httpx import httpx
from typing import List, Dict from typing import List, Dict
from .base_tts_service import BaseTTSService from .base_tts_service import BaseTTSService
class CustomTTSService(BaseTTSService): class CustomTTSService(BaseTTSService):
...@@ -9,61 +8,42 @@ class CustomTTSService(BaseTTSService): ...@@ -9,61 +8,42 @@ class CustomTTSService(BaseTTSService):
TTS Service implementation that calls our self-hosted, custom FastAPI model. TTS Service implementation that calls our self-hosted, custom FastAPI model.
""" """
def __init__(self): def __init__(self):
# Read the URL of our FastAPI server from an environment variable
base_url = os.getenv("CUSTOM_TTS_URL", "http://localhost:5000") base_url = os.getenv("CUSTOM_TTS_URL", "http://localhost:5000")
self.api_url = f"{base_url}/synthesize"
self.sequence_api_url = f"{base_url}/synthesize_sequence" self.api_url = f"{base_url}/generate_audio"
self._is_available = bool(base_url) self._is_available = bool(base_url)
print(f"Custom TTS Service initialized. Base URL: {base_url}") print(f"Custom TTS Service initialized. Target Endpoint: {self.api_url}")
def is_available(self) -> bool: def is_available(self) -> bool:
return self._is_available return self._is_available
def generate_speech(self, text: str, language: str = "en") -> bytes: def generate_speech(self, text: str) -> bytes:
""" """
Makes an HTTP POST request to the custom TTS FastAPI server. Makes an HTTP POST request to the custom TTS FastAPI server.
Expected API Payload: {"text": "some text"}
Returns: Binary audio data (WAV)
""" """
if not self.is_available(): payload = {
raise ConnectionError("Custom TTS service is not configured or available.") "text": text
}
try: try:
# Use httpx for modern, async-friendly requests with httpx.Client(timeout=60.0) as client:
with httpx.Client() as client: response = client.post(self.api_url, json=payload)
response = client.post(
self.api_url,
json={"text": text, "language": language},
timeout=120.0 # Set a generous timeout for long text
)
# Raise an exception for bad status codes (4xx or 5xx) # Check if the request was successful (2xx status codes)
response.raise_for_status() response.raise_for_status()
# The raw audio data is in the response content # Return the binary content (the WAV file)
audio_bytes = response.content return response.content
print(f"Successfully received audio from custom TTS service for language '{language}'.")
return audio_bytes except httpx.HTTPStatusError as exc:
print(f"Error response {exc.response.status_code} while requesting {exc.request.url!r}.")
except httpx.RequestError as e: raise exc
print(f"Error calling custom TTS service: {e}") except httpx.RequestError as exc:
# Re-raise as a standard ConnectionError print(f"An error occurred while requesting {exc.request.url!r}: {exc}")
raise ConnectionError(f"Failed to connect to custom TTS service at {self.api_url}") from e raise exc
except Exception as e:
def generate_speech_from_sequence(self, segments: List[Dict[str, str]]) -> bytes: print(f"Unexpected error in CustomTTSService: {e}")
"""Makes a single POST request with the list of segments.""" raise e
if not self.is_available(): \ No newline at end of file
raise ConnectionError("Custom TTS service is not configured.")
try:
with httpx.Client() as client:
print(f"Sending sequence of {len(segments)} segments to custom TTS service.")
response = client.post(
self.sequence_api_url,
json={"segments": segments}, # Send the list directly
timeout=300.0 # Longer timeout for sequence processing
)
response.raise_for_status()
audio_bytes = response.content
print("Successfully received stitched audio from custom TTS service.")
return audio_bytes
except httpx.RequestError as e:
raise ConnectionError(f"Failed to connect to custom TTS at {self.sequence_api_url}") from e
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment