Commit 3bc37f6b authored by salma's avatar salma

use the new RVC pipeline

parent 946afbe7
......@@ -9,6 +9,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Egyptian + Arabic ----------
(StudentNationality.EGYPTIAN, StudyLanguage.ARABIC): """
إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
......@@ -79,6 +80,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# ---------- Saudi + Arabic ----------
(StudentNationality.SAUDI, StudyLanguage.ARABIC): """
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
فقط لو الطفل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنص الثابت هذا:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
......@@ -149,7 +151,9 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Egyptian English --------
(StudentNationality.EGYPTIAN, StudyLanguage.ENGLISH): """
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أَساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
......@@ -218,6 +222,7 @@ SYSTEM_PROMPTS: Dict[Tuple[StudentNationality, StudyLanguage], str] = {
# -------- Saudi English --------
(StudentNationality.SAUDI, StudyLanguage.ENGLISH): """
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
اتكلم باللهجة المصرية.
لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنصّ الثابت هذا:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
......
......@@ -83,7 +83,7 @@ class AgentService:
# If we reach here, it means the response is a normal text string.
# Now it is safe to apply text-based fixes.
response = apply_fixes(response, custom_fixes)
#response = apply_fixes(response, custom_fixes)
# response = self.tashkeel_agent.apply_tashkeel(response)
print(f"response: {response}")
......
......@@ -133,8 +133,7 @@ class ChatService:
def _generate_and_upload_audio(self, text: str, student_id: str) -> dict:
""" Segments text, generates TTS audio, and uploads to MinIO. """
try:
segments = self.segmentation_service.segment_text(text)
audio_bytes = self.agent_service.tts_service.generate_speech_from_sequence(segments)
audio_bytes = self.agent_service.tts_service.generate_speech(text)
timestamp = int(time.time())
filename = f"agent_response_{timestamp}_{student_id}.wav"
minio_file_path = f"audio/{filename}"
......
......@@ -55,7 +55,7 @@ class OpenAIService(BaseTTSService):
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
# ------------------- TTS -------------------
def generate_speech(self, text: str, language: str = "en") -> bytes:
def generate_speech(self, text: str) -> bytes:
"""Generate speech from text using OpenAI TTS. Returns raw audio bytes."""
if not self.is_available():
raise HTTPException(status_code=500, detail="OpenAI service not available")
......@@ -80,16 +80,7 @@ class OpenAIService(BaseTTSService):
print(f"Error during OpenAI TTS generation: {e}")
raise HTTPException(status_code=500, detail=f"OpenAI TTS generation failed: {str(e)}")
def generate_speech_from_sequence(self, segments: List[Dict[str, str]]) -> bytes:
"""
Fallback implementation for OpenAI. It combines the text from all
segments and makes a single TTS call.
"""
print("OpenAI provider: combining segments for a single TTS call.")
full_text = " ".join([segment['text'] for segment in segments])
# Just call the existing simple method
return self.generate_speech(full_text)
# ------------------- Embeddings -------------------
......
......@@ -13,7 +13,7 @@ class BaseTTSService(ABC):
pass
@abstractmethod
def generate_speech(self, text: str, language: str = "en") -> bytes:
def generate_speech(self, text: str) -> bytes:
"""
Generate speech from text.
......@@ -26,10 +26,3 @@ class BaseTTSService(ABC):
"""
pass
@abstractmethod
def generate_speech_from_sequence(self, segments: List[Dict[str, str]]) -> bytes:
"""
Generates a single audio file from a list of language-tagged text segments.
This is for handling mixed-language sentences.
"""
pass
\ No newline at end of file
import os
import httpx
from typing import List, Dict
from .base_tts_service import BaseTTSService
class CustomTTSService(BaseTTSService):
......@@ -9,61 +8,42 @@ class CustomTTSService(BaseTTSService):
TTS Service implementation that calls our self-hosted, custom FastAPI model.
"""
def __init__(self):
# Read the URL of our FastAPI server from an environment variable
base_url = os.getenv("CUSTOM_TTS_URL", "http://localhost:5000")
self.api_url = f"{base_url}/synthesize"
self.sequence_api_url = f"{base_url}/synthesize_sequence"
self.api_url = f"{base_url}/generate_audio"
self._is_available = bool(base_url)
print(f"Custom TTS Service initialized. Base URL: {base_url}")
print(f"Custom TTS Service initialized. Target Endpoint: {self.api_url}")
def is_available(self) -> bool:
return self._is_available
def generate_speech(self, text: str, language: str = "en") -> bytes:
def generate_speech(self, text: str) -> bytes:
"""
Makes an HTTP POST request to the custom TTS FastAPI server.
Expected API Payload: {"text": "some text"}
Returns: Binary audio data (WAV)
"""
if not self.is_available():
raise ConnectionError("Custom TTS service is not configured or available.")
payload = {
"text": text
}
try:
# Use httpx for modern, async-friendly requests
with httpx.Client() as client:
response = client.post(
self.api_url,
json={"text": text, "language": language},
timeout=120.0 # Set a generous timeout for long text
)
with httpx.Client(timeout=60.0) as client:
response = client.post(self.api_url, json=payload)
# Raise an exception for bad status codes (4xx or 5xx)
# Check if the request was successful (2xx status codes)
response.raise_for_status()
# The raw audio data is in the response content
audio_bytes = response.content
print(f"Successfully received audio from custom TTS service for language '{language}'.")
return audio_bytes
except httpx.RequestError as e:
print(f"Error calling custom TTS service: {e}")
# Re-raise as a standard ConnectionError
raise ConnectionError(f"Failed to connect to custom TTS service at {self.api_url}") from e
def generate_speech_from_sequence(self, segments: List[Dict[str, str]]) -> bytes:
"""Makes a single POST request with the list of segments."""
if not self.is_available():
raise ConnectionError("Custom TTS service is not configured.")
try:
with httpx.Client() as client:
print(f"Sending sequence of {len(segments)} segments to custom TTS service.")
response = client.post(
self.sequence_api_url,
json={"segments": segments}, # Send the list directly
timeout=300.0 # Longer timeout for sequence processing
)
response.raise_for_status()
audio_bytes = response.content
print("Successfully received stitched audio from custom TTS service.")
return audio_bytes
except httpx.RequestError as e:
raise ConnectionError(f"Failed to connect to custom TTS at {self.sequence_api_url}") from e
\ No newline at end of file
# Return the binary content (the WAV file)
return response.content
except httpx.HTTPStatusError as exc:
print(f"Error response {exc.response.status_code} while requesting {exc.request.url!r}.")
raise exc
except httpx.RequestError as exc:
print(f"An error occurred while requesting {exc.request.url!r}: {exc}")
raise exc
except Exception as e:
print(f"Unexpected error in CustomTTSService: {e}")
raise e
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment