Commit 39ae3ad9 authored by salma's avatar salma

Parallel Speculative Execution

parent aa857505
...@@ -2,6 +2,8 @@ import os ...@@ -2,6 +2,8 @@ import os
import sys import sys
from typing import Dict from typing import Dict
from fastapi import HTTPException from fastapi import HTTPException
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from services.agent_helpers.agent_prompts import SYSTEM_PROMPTS from services.agent_helpers.agent_prompts import SYSTEM_PROMPTS
import logging import logging
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
...@@ -73,116 +75,153 @@ class ResponseGenerator: ...@@ -73,116 +75,153 @@ class ResponseGenerator:
temperature: float = 0.3, temperature: float = 0.3,
top_k: int = 3 top_k: int = 3
) -> str: ) -> str:
"""Enhanced AI response generation with JSON-based curriculum structure awareness""" """
Enhanced response generation with Parallel Speculative Execution.
Classification and Vector Search run simultaneously.
"""
if not self.openai_service.is_available(): if not self.openai_service.is_available():
raise HTTPException(status_code=500, detail="Agent service not available") raise HTTPException(status_code=500, detail="Agent service not available")
try: try:
# Get student info # 1. prepare (Sequential - fast DB lookups)
student_info = self.db_service.get_student_info(student_id) # Fetch student info and history first as they are needed for inputs
with ThreadPoolExecutor(max_workers=2) as pre_executor:
future_student = pre_executor.submit(self.db_service.get_student_info, student_id)
# Fetch history using the optimized
future_history = pre_executor.submit(self.get_conversation_history, student_id)
student_info = future_student.result()
conversation_history = future_history.result()
if not student_info: if not student_info:
raise HTTPException(status_code=404, detail=f"Student with ID {student_id} not found") raise HTTPException(status_code=404, detail=f"Student with ID {student_id} not found")
student_name = student_info.student_name.split()[0]
study_language = student_info.study_language
# Save user message (Fire and forget - strictly speaking we should wait,
# Add user message to DB # but for speed we can do this without blocking the logic if you trust your DB)
self.add_message_to_history(student_id, user_message, "user") self.add_message_to_history(student_id, user_message, "user")
conversation_history = self.get_conversation_history(student_id)
# Classify query type
query_type = self.query_handler.classify_query_type(
user_message,
student_info,
student_id,
conversation_history
)
logger.info(f"Query type: {query_type} for student {student_name} ({study_language.value}) with conversation context")
# Prepare system prompt # 2. speculative parallel execution
formatted_base_prompt = self.prepare_system_prompt(student_info)
# Build base messages query_type = "specific_content" # Default fallback
messages = [{"role": "system", "content": formatted_base_prompt}] relevant_results = [] # Default empty
messages.extend(conversation_history)
with ThreadPoolExecutor(max_workers=2) as executor:
# TASK A: THE BRAIN (Classification)
# We pass the conversation_history we already fetched to save the DB call
future_classification = executor.submit(
self.query_handler.classify_query_type,
user_message,
student_info,
student_id,
conversation_history
)
# TASK B: THE EYES (Vector Search)
# We "Speculate" that the user MIGHT ask a content question.
# We run the search immediately. If it turns out to be "general_chat",
# we just threw away 200ms of compute, but saved 1.5s of latency for real questions.
future_search = executor.submit(
self.context_generator.search_enhanced_content,
user_message,
student_info,
subject,
top_k
)
if query_type == "general_chat": # TASK C (Optional Check): Rule-based check for game help is fast,
chat_context = self.query_handler.handle_general_chat_query(user_message, student_info) # but we can do it inside the classification logic or separately.
messages.append({"role": "system", "content": f"سياق المحادثة العامة:\n{chat_context}"}) # Here we wait for the classification to finish.
query_type = future_classification.result()
elif query_type == "overview": logger.info(f"Query classified as: {query_type}")
overview_response = self.query_handler.handle_overview_query(student_info, subject)
messages.append({"role": "system", "content": f"المنهج الكامل من ملف JSON:\n{overview_response}"}) # 3. SYNCHRONIZATION & DECISION
elif query_type == "navigation": system_context_content = ""
navigation_response = self.query_handler.handle_navigation_query(user_message, student_info, subject)
messages.append({"role": "system", "content": f"تفاصيل الوحدة/المفهوم من JSON:\n{navigation_response}"})
elif query_type == "specific_content": if query_type == "specific_content":
# Enhanced content search # The brain says: "This is science!"
relevant_results = self.context_generator.search_enhanced_content( # We check the eyes (search results). They are ALREADY READY.
user_message, student_info, subject, top_k relevant_results = future_search.result()
)
if relevant_results: if relevant_results:
enhanced_context = self.context_generator.generate_enhanced_context( system_context_content = self.context_generator.generate_enhanced_context(
relevant_results, student_info, query_type relevant_results, student_info, query_type
) )
messages.append({"role": "system", "content": enhanced_context}) logger.info(f"Using speculative search results: {len(relevant_results)} chunks")
logger.info(f"Added enhanced context with {len(relevant_results)} chunks for student {student_name}")
elif query_type == "game_help":
elif query_type == "game_help": # Handle game help
game_context, user_query = self.query_handler.handle_game_help_query(user_message) game_context, user_query = self.query_handler.handle_game_help_query(user_message)
logger.info(f"Handling game_help query. Context: {game_context}") system_context_content = f"سياق اللعبة التعليمية اللي هتساعد الطفل فيها:\n{game_context}"
# For game help, we also use the search results we started in Task B!
# (Assuming the game query might need knowledge)
relevant_results = future_search.result()
if relevant_results:
enhanced_ctx = self.context_generator.generate_enhanced_context(
relevant_results, student_info, query_type
)
system_context_content += f"\n\nمحتوي المنهج اللي ليه علاقة بسؤال الطفل:\n{enhanced_ctx}"
elif query_type == "general_chat":
# The brain says: "Just chatting."
# We IGNORE the search results from Task B.
# (Task B might still be running or finished, we just don't use the data).
chat_context = self.query_handler.handle_general_chat_query(user_message, student_info)
system_context_content = f"سياق المحادثة العامة:\n{chat_context}"
elif query_type == "overview":
overview_response = self.query_handler.handle_overview_query(student_info, subject)
system_context_content = f"المنهج الكامل من ملف JSON:\n{overview_response}"
elif query_type == "navigation":
nav_response = self.query_handler.handle_navigation_query(user_message, student_info, subject)
system_context_content = f"تفاصيل الوحدة/المفهوم من JSON:\n{nav_response}"
# Start building a single, comprehensive context string elif query_type == "ask_for_question":
system_context = f"سياق اللعبة التعليمية اللي هتساعد الطفل فيها:\n{game_context}" # Special case, returns dict immediately
return {
"type": "mcq",
"data": self.agent_service.handle_ask_for_question(student_id)
}
# Search for and add curriculum context if it exists # 4. FINAL GENERATION
relevant_results = self.context_generator.search_enhanced_content(
user_query, student_info, subject, top_k
)
if relevant_results:
enhanced_context = self.context_generator.generate_enhanced_context(
relevant_results, student_info, query_type
)
# Append the curriculum context to the same string
system_context += f"\n\nمحتوي المنهج اللي ليه علاقة بسؤال الطفل:\n{enhanced_context}"
logger.info(f"Added enhanced context with {len(relevant_results)} chunks for game help.")
# Now, add only ONE system message with all the context
messages.append({"role": "system", "content": system_context})
if query_type == "ask_for_question":
mcq_data = self.agent_service.handle_ask_for_question(student_id)
return {
"type": "mcq",
"data": mcq_data
}
# Finally add user message
messages.append({"role": "user", "content": user_message})
# Prepare system prompt
formatted_base_prompt = self.prepare_system_prompt(student_info)
# Build messages
messages = [{"role": "system", "content": formatted_base_prompt}]
messages.extend(conversation_history)
# Add the context we decided on (if any)
if system_context_content:
messages.append({"role": "system", "content": system_context_content})
# ========================== # Add user message
# CALL AI MODEL messages.append({"role": "user", "content": user_message})
# ==========================
# Call AI
response = self.openai_service.client.chat.completions.create( response = self.openai_service.client.chat.completions.create(
model=model, model=model,
messages=messages, messages=messages,
temperature=temperature temperature=temperature
) )
ai_response = response.choices[0].message.content.strip() ai_response = response.choices[0].message.content.strip()
if not ai_response: if not ai_response:
raise ValueError("Empty response from AI model") raise ValueError("Empty response from AI model")
# Save AI response # Save AI response
self.add_message_to_history(student_id, ai_response, "assistant") self.add_message_to_history(student_id, ai_response, "assistant")
logger.info(f"Generated {query_type} response for {student_name} ({study_language.value}) with conversation context: {len(ai_response)} characters")
return ai_response return ai_response
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
logger.error(f"Error generating response for student {student_id}: {e}") logger.error(f"Error generating response for student {student_id}: {e}")
raise HTTPException(status_code=500, detail="Error generating response") raise HTTPException(status_code=500, detail="Error generating response")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment