import logging
import os
from typing import List, Dict, Optional
from fastapi import HTTPException
import sys
import json
import random
import math
import re
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from core import StudentNationality, Models
from services.pgvector_service import PGVectorService
from services.openai_service import OpenAIService
from services.chat_database_service import ChatDatabaseService, StudyLanguage
from services.pedagogy_service import PedagogyService
from services.connection_pool import ConnectionPool
from services.agent_helpers.query_handlers import QueryHandler
from services.agent_helpers.context_generator import ContextGenerator
from services.agent_helpers.response_generator import ResponseGenerator
from services.agent_helpers.tashkeel_agent import TashkeelAgent
from services.agent_helpers.tashkeel_fixer import apply_fixes, custom_fixes
from services.tts.tts_manager import get_tts_service

logger = logging.getLogger(__name__)

class AgentService:
    """Main service class for handling AI agent conversations with modular architecture"""

    def __init__(self, use_pgvector: bool = True, pool_handler: Optional[ConnectionPool] = None):
        # Initialize core services
        self.openai_service = OpenAIService()
        if not self.openai_service.is_available():
            logger.warning("Warning: OPENAI_API_KEY not found. Agent service will be disabled.")
        
        self.tts_service = get_tts_service(self.openai_service)
        if not self.tts_service.is_available():
            logger.warning("Warning: No TTS service is available.")

        # Database setup
        self.pool_handler = pool_handler
        if self.pool_handler is None:
            self.pool_handler = ConnectionPool(
                minconn=1,
                maxconn=20,
                dbname=os.getenv("DB_NAME"),
                user=os.getenv("DB_USER"),
                password=os.getenv("DB_PASSWORD"),
                host=os.getenv("DB_HOST"),
                port=os.getenv("DB_PORT")
            )
        
        self.db_service = ChatDatabaseService(self.pool_handler)
        
        # PGVector setup
        self.use_pgvector = use_pgvector
        if self.use_pgvector:
            self.pgvector = PGVectorService(self.pool_handler)
            self.pgvector.setup_curriculum_table()
        else:
            self.pgvector = None

        self.pedagogy_service = PedagogyService()
        
        # Initialize modular components
        self.query_handler = QueryHandler(self.openai_service, self.pgvector, self.db_service)
        self.context_generator = ContextGenerator(self.openai_service, self.pgvector)
        self.response_generator = ResponseGenerator(
            self.openai_service, self.db_service, self.pedagogy_service,
            self.query_handler, self.context_generator, self
        )

        self.tashkeel_agent = TashkeelAgent(self.openai_service)


    def is_available(self) -> bool:
        return self.openai_service.is_available()



    def generate_response(self, user_message: str, student_id: str, subject: str = "Science", 
                         model: str = Models.chat, temperature: float = 0.3, top_k: int = 3):
        """
        Main response generation method, now handles both string and dictionary responses.
        """
        # This can return either a string (for text answers) or a dict (for MCQs)
        response = self.response_generator.generate_response(
            user_message, student_id, subject, model, temperature, top_k
        )

        # Check if the response is a special dictionary type (like our MCQ response).
        # If it is, we must return it directly without any text processing.
        if isinstance(response, dict):
            logger.info("AgentService received a structured response (MCQ). Bypassing text processing.")
            return response

        # If we reach here, it means the response is a normal text string.
        # Now it is safe to apply text-based fixes.
        response = apply_fixes(response, custom_fixes)
        # response = self.tashkeel_agent.apply_tashkeel(response)

        print(f"response: {response}")
        return response


    def close(self):
        """Close database connection pools"""
        if self.pool_handler:
            try:
                self.pool_handler.close_all()
            except Exception as e:
                logger.error(f"Error closing connection pools: {e}")

    

    def _extract_grade_integer(self, grade_str: str) -> int:
        """
        Safely extracts the first integer from a grade string like '4th Grade'.
        This acts as a translator between the string-based MCQ schema and the
        integer-based vector DB schema.
        """
        if not isinstance(grade_str, str):
             raise ValueError(f"Grade must be a string, but got {type(grade_str)}")
        
        numbers = re.findall(r'\d+', grade_str)
        if numbers:
            return int(numbers[0])
        
        # If no numbers are found, we cannot query the vector DB. This is an invalid input.
        raise ValueError(f"Could not extract a numeric grade from the input string: '{grade_str}'")
    



    def generate_and_store_mcqs(
        self, curriculum: str, grade: str, subject: str, unit: str, concept: str, 
        is_arabic: bool, num_questions: int = 5
    ) -> List[Dict]:
        """
        Generates NEW, UNIQUE MCQs with the full schema. The AI is now responsible
        for assigning a balanced difficulty level to each question.
        """
        if not self.pgvector:
            raise HTTPException(status_code=503, detail="Vector service is not available for context retrieval.")

        # ... (Step 1 and 2 for getting existing questions and context remain the same) ...
        logger.info(f"Checking for existing questions for: {curriculum}/{grade}/{subject}/{unit}/{concept}")
        existing_questions = self.pgvector.get_mcqs(
            curriculum, grade, subject, unit, concept, is_arabic, limit=None
        )
        existing_questions_text = "No existing questions found."
        if existing_questions:
            q_list = [f"- {q['question_text']}" for q in existing_questions]
            existing_questions_text = "\n".join(q_list)
        
        search_query = f"summary of {concept} in {unit} for {subject}"
        query_embedding = self.openai_service.generate_embedding(search_query)
        try:
            grade_for_search = self._extract_grade_integer(grade)
        except ValueError as e:
            raise HTTPException(status_code=400, detail=str(e))
        context_chunks = self.pgvector.search_filtered_nearest(
            query_embedding, grade_for_search, subject, is_arabic, limit=10 
        )
        if not context_chunks:
            raise HTTPException(status_code=404, detail="No curriculum context found for this topic.")
        full_context = "\n---\n".join([chunk['chunk_text'] for chunk in context_chunks])
        
        # --- STEP 3: THE PROMPT IS UPDATED TO HANDLE DIFFICULTY INTERNALLY ---
        if is_arabic:
            # (A similar detailed prompt in Arabic would be needed here)
            prompt =f"""
    أنت خبير في تطوير المناهج التعليمية، ومهمتك هي إنشاء **أسئلة اختيار من متعدد جديدة بالكامل** (أسئلة لم تظهر من قبل).

    هذه هي الأسئلة الموجودة بالفعل لمفهوم "{concept}":
    --- الأسئلة الموجودة ---
    {existing_questions_text}
    --- نهاية الأسئلة الموجودة ---

    استنادًا فقط إلى المعلومات التالية:
    --- السياق ---
    {full_context}
    --- نهاية السياق ---

    قم بإنشاء {num_questions} سؤالًا جديدًا تمامًا من نوع الاختيار من متعدد (MCQ)، **مختلفة كليًا عن الأسئلة الموجودة أعلاه**.

    ⚠️ **مهم جدًا**:
    يجب أن تتضمن الأسئلة مستويات صعوبة متنوعة وفق التوزيع التالي تقريبًا:
    - ٤٠٪ أسئلة سهلة (مستوى صعوبة من ١ إلى ٤)
    - ٣٠٪ أسئلة متوسطة (مستوى صعوبة من ٥ إلى ٧)
    - ٣٠٪ أسئلة صعبة (مستوى صعوبة من ٨ إلى ١٠)

    **صيغة الإخراج مطلوبة أن تكون مصفوفة JSON صالحة** (JSON array) من الكائنات (objects).  
    كل كائن يجب أن يحتوي على المفاتيح التالية **بالضبط**:

    - "question_text": نص السؤال.
    - "difficulty_level": رقم صحيح من ١ إلى ١٠ يمثل مستوى الصعوبة.
    - "question_type": نوع السؤال (مثلاً: "multiple_choice").
    - "correct_answer": الإجابة الصحيحة الوحيدة.
    - "wrong_answer_1": إجابة خاطئة ولكن معقولة.
    - "wrong_answer_2": إجابة خاطئة ولكن معقولة.
    - "wrong_answer_3": إجابة خاطئة ولكن معقولة.
    - "wrong_answer_4": إجابة خاطئة ولكن معقولة.
    - "hint": تلميح أو مساعدة للطالب لفهم السؤال.
    - "question_image_url": اترك هذا الحقل كسلسلة فارغة "".
    - "correct_image_url": اترك هذا الحقل كسلسلة فارغة "".
    - "wrong_image_url_1": اترك هذا الحقل كسلسلة فارغة "".
    - "wrong_image_url_2": اترك هذا الحقل كسلسلة فارغة "".
    - "wrong_image_url_3": اترك هذا الحقل كسلسلة فارغة "".
    - "wrong_image_url_4": اترك هذا الحقل كسلسلة فارغة "".

    لا تكتب أي نص خارج مصفوفة JSON.
    """
        else:
            prompt = f"""
            You are an expert curriculum developer creating new multiple-choice questions.

            Here are the questions that ALREADY EXIST for the concept "{concept}":
            --- EXISTING QUESTIONS ---
            {existing_questions_text}
            --- END EXISTING QUESTIONS ---

            Based ONLY on the following context:
            --- CONTEXT ---
            {full_context}
            --- END CONTEXT ---
            
            Generate {num_questions} NEW and COMPLETELY DIFFERENT multiple-choice questions from the list above.

            **IMPORTANT**: For the {num_questions} questions you generate, assign a difficulty level to each one. The distribution should be approximately:
            - 40% easy (difficulty 1-4)
            - 30% medium (difficulty 5-7)
            - 30% hard (difficulty 8-10)
            
            Your response MUST be a valid JSON array of objects. Each object must have these exact keys:
            - "question_text": The text of the question.
            - "difficulty_level": An integer between 1 and 10, based on the distribution rule.
            - "question_type": The type of question (e.g., "multiple_choice").
            - "correct_answer": The single correct answer.
            - "wrong_answer_1": A plausible wrong answer.
            - "wrong_answer_2": Another plausible wrong answer.
            - "wrong_answer_3": A third plausible wrong answer.
            - "wrong_answer_4": A fourth plausible wrong answer.
            - "hint": A helpful hint for the student.
            - "question_image_url": Leave this as an empty string "".
            - "correct_image_url": Leave this as an empty string "".
            - "wrong_image_url_1": Leave this as an empty string "".
            - "wrong_image_url_2": Leave this as an empty string "".
            - "wrong_image_url_3": Leave this as an empty string "".
            - "wrong_image_url_4": Leave this as an empty string "".
            
            Do not include any text outside of the JSON array.
            """

        # --- STEP 4: CALL LLM and PARSE (unchanged) ---
        try:
            response = self.openai_service.client.chat.completions.create(
                model=Models.chat, messages=[{"role": "user", "content": prompt}],
                temperature=0.7, response_format={"type": "json_object"}
            )
            response_content = response.choices[0].message.content
            json_response = json.loads(response_content)
            generated_questions = next((v for v in json_response.values() if isinstance(v, list)), None)
            if not generated_questions:
                 raise ValueError("LLM did not return a list of questions in the JSON response.")
        except (json.JSONDecodeError, ValueError, KeyError, StopIteration) as e:
            logger.error(f"Failed to parse MCQ response from LLM: {e}\nRaw Response: {response_content}")
            raise HTTPException(status_code=500, detail="Failed to generate or parse MCQs from AI.")

        # --- STEP 5: PREPARE FOR STORAGE (Now gets difficulty_level from the AI response) ---
        mcqs_to_store = []
        for q in generated_questions:
            mcqs_to_store.append({
                "curriculum": curriculum, "grade": grade, "subject": subject, "unit": unit, 
                "concept": concept, "is_arabic": is_arabic, 
                "difficulty_level": q.get("difficulty_level"), # <-- AI now provides this
                "question_text": q.get("question_text"),
                "question_type": q.get("question_type", "multiple_choice"),
                "correct_answer": q.get("correct_answer"),
                "wrong_answer_1": q.get("wrong_answer_1"),
                "wrong_answer_2": q.get("wrong_answer_2"),
                "wrong_answer_3": q.get("wrong_answer_3"),
                "wrong_answer_4": q.get("wrong_answer_4"),
                "hint": q.get("hint"),
                "question_image_url": q.get("question_image_url"),
                "correct_image_url": q.get("correct_image_url"),
                "wrong_image_url_1": q.get("wrong_image_url_1"),
                "wrong_image_url_2": q.get("wrong_image_url_2"),
                "wrong_image_url_3": q.get("wrong_image_url_3"),
                "wrong_image_url_4": q.get("wrong_image_url_4"),
            })
        
        self.pgvector.insert_mcqs(mcqs_to_store)
        return mcqs_to_store

    

    def handle_ask_for_question(self, student_id: str) -> Dict:
        """
        Handles when a student asks for a question. It generates one new question,
        uses an LLM to find a small subset of RELEVANT questions, and then
        RANDOMLY selects one from that subset. This version correctly handles cases
        with a small number of available questions.
        """
        logger.info(f"Handling 'ask_for_question' request for student {student_id}.")
        
        # 1. Get student info and determine topic (No changes here)
        student_info = self.db_service.get_student_info(student_id)
        if not student_info: raise HTTPException(status_code=404, detail="Student not found.")
        grade, is_arabic, subject = student_info['grade'], student_info['is_arabic'], "Science"
        grade_str = f"{grade}th Grade"
        nationality = student_info['nationality']
        if nationality == StudentNationality.EGYPTIAN:
            curriculum = "EGYPTIAN National"
        else:
            curriculum = "SAUDI National"

        recent_history = self.db_service.get_chat_history(student_id, limit=6)
        if not recent_history: raise HTTPException(status_code=400, detail="Cannot ask a question without conversation context.")

        history_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in recent_history])
        topic_prompt = f"""
        Based on the recent conversation below, identify the specific Unit and Concept the student is currently discussing.
        Your response MUST be a valid JSON object with the keys "unit" and "concept".
        Conversation:\n{history_text}
        """
        try:
            response = self.openai_service.client.chat.completions.create(
                model=Models.classification, messages=[{"role": "user", "content": topic_prompt}],
                temperature=0, response_format={"type": "json_object"}
            )
            topic_data = json.loads(response.choices[0].message.content)
            unit, concept = topic_data['unit'], topic_data['concept']
            logger.info(f"Determined current topic for question: Unit='{unit}', Concept='{concept}'")
        except (json.JSONDecodeError, KeyError) as e:
            logger.error(f"Could not determine topic for student {student_id}: {e}")
            raise HTTPException(status_code=500, detail="Could not determine the current topic.")

        # 2. Generate one new question to enrich the pool (No changes here)
        try:
            self.generate_and_store_mcqs(curriculum, grade_str, subject, unit, concept, is_arabic, num_questions=1)
        except Exception as e:
            logger.warning(f"Non-critical error: Failed to generate a new background MCQ: {e}")

        # 3. Retrieve and filter the pool of available questions (No changes here)
        all_mcqs = self.pgvector.get_mcqs(curriculum, grade_str, subject, unit, concept, is_arabic, limit=None)
        if not all_mcqs: raise HTTPException(status_code=404, detail="No questions found for the current topic.")

        asked_question_texts = {msg['content'] for msg in recent_history if msg['role'] == 'assistant'}
        unasked_mcqs = [mcq for mcq in all_mcqs if mcq['question_text'] not in asked_question_texts]
        
        if not unasked_mcqs:
            logger.warning(f"All questions for '{concept}' have been asked recently. Re-using full list.")
            unasked_mcqs = all_mcqs

        # --- THIS IS THE ROBUST TWO-STEP SELECTION LOGIC ---

        # 4. STEP 1 (Filter with AI): Get a SUBSET of relevant questions.
        relevant_question_texts = []
        last_user_message = recent_history[-1]['content']
        
        # Dynamically determine how many questions to ask for.
        # Ask for up to 3, but no more than the number of available questions.
        num_to_select = min(3, len(unasked_mcqs))
        
        # If there's only one question, we don't need to ask the LLM to choose.
        if num_to_select == 1:
            relevant_question_texts = [unasked_mcqs[0]['question_text']]
            logger.info("Only one un-asked question available, selecting it directly.")
        elif num_to_select > 1:
            selection_prompt = f"""
            A student just said: "{last_user_message}"
            Here is a list of available questions about the topic '{concept}':
            {json.dumps([q['question_text'] for q in unasked_mcqs], ensure_ascii=False, indent=2)}

            From the list above, select the {num_to_select} questions that are MOST RELEVANT to what the student just said.

            Your response MUST be a valid JSON object with a single key "relevant_questions" which is an array of the chosen question strings.
            Example: {{"relevant_questions": ["Question text 1", "Question text 2"]}}
            """
            
            try:
                response = self.openai_service.client.chat.completions.create(
                    model=Models.classification,
                    messages=[{"role": "user", "content": selection_prompt}],
                    temperature=0.1,
                    response_format={"type": "json_object"}
                )
                response_data = json.loads(response.choices[0].message.content)
                relevant_question_texts = response_data.get("relevant_questions", [])
                logger.info(f"LLM identified {len(relevant_question_texts)} relevant questions.")
            except Exception as e:
                logger.warning(f"LLM failed to select a relevant subset of questions: {e}. Will select from all available questions.")
        
        # Robust Fallback: If the LLM fails or returns an empty list, use all un-asked questions as the pool.
        if not relevant_question_texts:
            relevant_question_texts = [mcq['question_text'] for mcq in unasked_mcqs]

        # 5. STEP 2 (Select with Randomness): Randomly choose from the relevant subset.
        chosen_question_text = random.choice(relevant_question_texts)
        
        # 6. Find the full MCQ object for the chosen text and return it.
        chosen_mcq = None
        for mcq in unasked_mcqs:
            if mcq['question_text'] == chosen_question_text:
                chosen_mcq = mcq
                break
        
        # Fallback in case the chosen text somehow doesn't match
        if not chosen_mcq:
            chosen_mcq = random.choice(unasked_mcqs)

        logger.info(f"Selected question for student {student_id}: '{chosen_mcq['question_text']}'")
        
        # Add the chosen question's text to history to prevent immediate re-asking
        self.db_service.add_message(student_id, 'assistant', chosen_mcq['question_text'])
        
        return chosen_mcq
    

    def get_dynamic_quiz(
        self, curriculum: str, grade: str, subject: str, unit: str, concept: str, is_arabic: bool, count: int
        ) -> List[Dict]:
        """
        Generates a dynamic quiz of 'count' questions using a hybrid approach with BATCHED generation.
        """
        if not self.pgvector:
            raise HTTPException(status_code=503, detail="Vector service is not available for this feature.")
        
        MAX_QUESTIONS_PER_BATCH = 10
        num_fresh_questions = min(max(1, math.floor(count / 3)), 5)
        logger.info(f"Request for {count} questions. Step 1: Generating {num_fresh_questions} new 'freshness' questions.")
        
        try:
            # --- FIX #1: Removed the erroneous 'difficulty_level' argument ---
            self.generate_and_store_mcqs(
                curriculum=curriculum, grade=grade, subject=subject, unit=unit, concept=concept,
                is_arabic=is_arabic, num_questions=num_fresh_questions
            )
        except Exception as e:
            logger.warning(f"Could not generate 'freshness' questions for the quiz due to an error: {e}")

        all_mcqs_after_freshness = self.pgvector.get_mcqs(
            curriculum=curriculum,
            grade=grade, subject=subject, unit=unit, concept=concept,
            is_arabic=is_arabic, limit=None
        )
        
        questions_still_needed = count - len(all_mcqs_after_freshness)

        if questions_still_needed > 0:
            logger.info(f"After freshness batch, have {len(all_mcqs_after_freshness)} questions. Need to generate {questions_still_needed} more to meet count of {count}.")
            
            remaining = questions_still_needed
            while remaining > 0:
                batch_size = min(remaining, MAX_QUESTIONS_PER_BATCH)
                try:
                    logger.info(f"Generating batch of {remaining // MAX_QUESTIONS_PER_BATCH + 1} of {batch_size} questions...")
                    
                    # --- FIX #2: Added the missing 'curriculum' argument ---
                    self.generate_and_store_mcqs(
                        curriculum=curriculum,
                        grade=grade, 
                        subject=subject, 
                        unit=unit, 
                        concept=concept,
                        is_arabic=is_arabic, 
                        num_questions=batch_size
                    )
                    remaining -= batch_size
                    
                except Exception as e:
                    logger.error(f"Failed to generate batch of {batch_size} questions: {e}")
                    # Break the loop if generation fails to prevent an infinite loop
                    break

        final_pool = self.pgvector.get_mcqs(
            curriculum=curriculum,
            grade=grade, subject=subject, unit=unit, concept=concept,
            is_arabic=is_arabic, limit=None
        )

        if not final_pool:
            raise HTTPException(status_code=404, detail="No questions could be found or generated for this topic.")
        
        if len(final_pool) < count:
            logger.warning(f"Could only gather {len(final_pool)} questions out of {count} requested. Returning all available questions.")
            
        random.shuffle(final_pool)
        final_quiz = final_pool[:min(count, len(final_pool))]
        
        logger.info(f"Returning a dynamic quiz of {len(final_quiz)} questions for '{concept}'.")
        return final_quiz