mcq AI builder

parent 8d42c50c
import os
import shutil
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, BackgroundTasks
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, BackgroundTasks, logger
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, Response
from fastapi.staticfiles import StaticFiles
......@@ -12,6 +12,7 @@ from pathlib import Path
import tempfile
import json
import pandas as pd
import logging
from curriculum_structure import convert_json_to_db_format
from process_pdf_pipline import run_full_pipeline
......@@ -96,6 +97,7 @@ async def lifespan(app: FastAPI):
def create_app() -> FastAPI:
# Connect the lifespan manager to your FastAPI app instance
app = FastAPI(title="Unified Chat API with Local Agent", lifespan=lifespan)
logger = logging.getLogger("uvicorn.error")
# Fixed CORS configuration for CapRover
app.add_middleware(
......@@ -336,6 +338,74 @@ def create_app() -> FastAPI:
return {"status": "processing_started", "message": "The curriculum is being processed in the background."}
@app.post("/mcq/generate")
async def generate_mcqs_handler(
request: Request,
grade: int = Form(...),
subject: str = Form(...),
unit: str = Form(...),
concept: str = Form(...),
count: int = Form(5),
is_arabic: bool = Form(False)
):
"""
Generates and stores a new set of MCQs for a specific topic.
"""
container = request.app.state.container
try:
generated_questions = container.agent_service.generate_and_store_mcqs(
grade=grade,
subject=subject,
unit=unit,
concept=concept,
num_questions=count,
is_arabic=is_arabic
)
return {
"status": "success",
"message": f"Successfully generated and stored {len(generated_questions)} MCQs.",
"questions": generated_questions
}
except HTTPException as e:
raise e # Re-raise FastAPI specific exceptions
except Exception as e:
logger.error(f"Error in generate_mcqs_handler: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/mcq")
async def get_mcqs_handler(
request: Request,
grade: int,
subject: str,
unit: str,
concept: str,
is_arabic: bool,
# Make limit optional. If not provided, it will be None.
limit: Optional[int] = None
):
"""
Retrieves existing MCQs for a specific topic and language from the database.
If no limit is provided, retrieves all questions.
"""
container = request.app.state.container
try:
questions = container.agent_service.pgvector.get_mcqs(
grade=grade,
subject=subject,
unit=unit,
concept=concept,
is_arabic=is_arabic,
limit=limit # Pass the limit (which could be None)
)
return {
"status": "success",
"count": len(questions),
"questions": questions
}
except Exception as e:
logger.error(f"Error in get_mcqs_handler: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.options("/get-audio-response")
async def audio_response_options():
"""Handle preflight CORS requests for audio response endpoint"""
......
......@@ -3,6 +3,7 @@ import os
from typing import List, Dict, Optional
from fastapi import HTTPException
import sys
import json
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from core import StudentNationality, Models
......@@ -94,3 +95,132 @@ class AgentService:
except Exception as e:
logger.error(f"Error closing connection pools: {e}")
def generate_and_store_mcqs(
self, grade: int, subject: str, unit: str, concept: str, is_arabic: bool, num_questions: int = 5
) -> List[Dict]:
"""
Generates NEW, UNIQUE MCQs for a topic by first retrieving existing ones
and instructing the AI to avoid generating duplicates.
"""
if not self.pgvector:
raise HTTPException(status_code=503, detail="Vector service is not available for context retrieval.")
# === STEP 1: RETRIEVE EXISTING QUESTIONS ===
logger.info(f"Checking for existing questions for: {grade}/{subject}/{unit}/{concept}")
existing_questions = self.pgvector.get_mcqs(
grade, subject, unit, concept, is_arabic, limit=None # Fetch ALL existing questions
)
existing_questions_text = "No existing questions found."
if existing_questions:
# Format the existing questions into a simple list for the prompt
q_list = [f"- {q['question_text']}" for q in existing_questions]
existing_questions_text = "\n".join(q_list)
logger.info(f"Found {len(existing_questions)} existing questions. Will instruct AI to generate different ones.")
# === STEP 2: RETRIEVE CURRICULUM CONTEXT ===
search_query = f"summary of {concept} in {unit}"
query_embedding = self.openai_service.generate_embedding(search_query)
context_chunks = self.pgvector.search_filtered_nearest(
query_embedding, grade, subject, is_arabic, limit=10
)
if not context_chunks:
raise HTTPException(status_code=404, detail="No curriculum context found for this topic in the specified language.")
full_context = "\n---\n".join([chunk['chunk_text'] for chunk in context_chunks])
# === STEP 3: CREATE THE ADVANCED, AWARE PROMPT ===
if is_arabic:
prompt = f"""
أنت خبير في تطوير المناهج ومهمتك إنشاء أسئلة اختيار من متعدد جديدة ومختلفة.
هذه هي الأسئلة الموجودة حاليًا في قاعدة البيانات حول المفهوم "{concept}":
--- الأسئلة الحالية ---
{existing_questions_text}
--- نهاية الأسئلة الحالية ---
اعتمادًا فقط على السياق التالي من المنهج:
--- السياق ---
{full_context}
--- نهاية السياق ---
يرجى توليد {num_questions} من أسئلة الاختيار من متعدد **الجديدة والمختلفة تمامًا** عن الأسئلة الموجودة أعلاه.
يجب أن تكون كل الأسئلة قابلة للإجابة مباشرة من السياق المقدم.
يجب أن يكون ردك مصفوفة JSON صحيحة. كل كائن يجب أن يحتوي على المفاتيح التالية:
- "question_text": نص السؤال.
- "correct_answer": الإجابة الصحيحة.
- "wrong_answer_1": إجابة خاطئة.
- "wrong_answer_2": إجابة خاطئة.
- "wrong_answer_3": إجابة خاطئة.
لا تكتب أي نص أو شرح خارج مصفوفة الـ JSON.
"""
else:
prompt = f"""
You are an expert curriculum developer tasked with creating new and unique multiple-choice questions.
Here are the questions that ALREADY EXIST in the database for the concept "{concept}":
--- EXISTING QUESTIONS ---
{existing_questions_text}
--- END EXISTING QUESTIONS ---
Based ONLY on the following context from the curriculum:
--- CONTEXT ---
{full_context}
--- END CONTEXT ---
Please generate {num_questions} NEW and COMPLETELY DIFFERENT multiple-choice questions from the list of existing ones above.
Each question must be answerable directly from the provided context. The questions and all answers MUST be in English.
Your response MUST be a valid JSON array of objects with these keys:
- "question_text"
- "correct_answer"
- "wrong_answer_1"
- "wrong_answer_2"
- "wrong_answer_3"
Do not include any text outside of the JSON array.
"""
# === STEP 4 & 5: CALL LLM, PARSE, and STORE (No changes here) ===
try:
# ... (The entire try/except block for calling the LLM remains exactly the same)
response = self.openai_service.client.chat.completions.create(
model=Models.chat,
messages=[{"role": "user", "content": prompt}],
temperature=0.5, # Slightly higher temp for more creativity
response_format={"type": "json_object"}
)
response_content = response.choices[0].message.content
json_response = json.loads(response_content)
generated_questions = []
for key, value in json_response.items():
if isinstance(value, list):
generated_questions = value
break
if not generated_questions:
raise ValueError("LLM did not return a list of questions in the JSON response.")
except (json.JSONDecodeError, ValueError, KeyError) as e:
logger.error(f"Failed to parse MCQ response from LLM: {e}\nRaw Response: {response_content}")
raise HTTPException(status_code=500, detail="Failed to generate or parse MCQs from AI.")
mcqs_to_store = []
for q in generated_questions:
mcqs_to_store.append({
"grade": grade, "is_arabic": is_arabic, "subject": subject,
"unit": unit, "concept": concept, "question_text": q["question_text"],
"correct_answer": q["correct_answer"], "wrong_answer_1": q["wrong_answer_1"],
"wrong_answer_2": q["wrong_answer_2"], "wrong_answer_3": q["wrong_answer_3"],
})
self.pgvector.insert_mcqs(mcqs_to_store)
return mcqs_to_store
\ No newline at end of file
......@@ -524,3 +524,59 @@ class PGVectorService:
except Exception as e:
print(f"❌ Database verification failed: {e}")
def insert_mcqs(self, mcq_list: List[Dict]):
"""
Inserts a batch of MCQs, now including the language flag.
"""
if not mcq_list:
return
with self.pool_handler.get_connection() as conn:
with conn.cursor() as cur:
# --- UPDATED QUERY ---
insert_query = """
INSERT INTO mcq_questions (
grade, is_arabic, subject, unit, concept, question_text,
correct_answer, wrong_answer_1, wrong_answer_2, wrong_answer_3
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
"""
# --- UPDATED DATA PREPARATION ---
data_to_insert = [
(
q['grade'], q['is_arabic'], q['subject'], q['unit'], q['concept'],
q['question_text'], q['correct_answer'], q['wrong_answer_1'],
q['wrong_answer_2'], q['wrong_answer_3']
) for q in mcq_list
]
cur.executemany(insert_query, data_to_insert)
conn.commit()
logger.info(f"Successfully inserted {len(mcq_list)} MCQs into the database.")
def get_mcqs(self, grade: int, subject: str, unit: str, concept: str, is_arabic: bool, limit: Optional[int] = 10) -> List[Dict]:
"""
Retrieves MCQs for a specific topic and language.
If limit is None, it retrieves all matching questions.
"""
with self.pool_handler.get_connection() as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Dynamically build the query based on the limit
query = """
SELECT id, question_text, correct_answer, wrong_answer_1, wrong_answer_2, wrong_answer_3
FROM mcq_questions
WHERE grade = %s AND subject = %s AND unit = %s AND concept = %s AND is_arabic = %s
ORDER BY created_at DESC
"""
params = (grade, subject, unit, concept, is_arabic)
if limit is not None:
query += " LIMIT %s;"
params += (limit,)
else:
query += ";"
cur.execute(query, params)
return cur.fetchall()
\ No newline at end of file
import psycopg2
import os
from dotenv import load_dotenv
load_dotenv()
def setup_mcq_table(drop_existing_table: bool = False):
"""
Sets up the mcq_questions table in the database.
"""
try:
conn = psycopg2.connect(
host=os.getenv("POSTGRES_HOST", "localhost"),
port=os.getenv("POSTGRES_PORT", "5432"),
user=os.getenv("POSTGRES_USER"),
password=os.getenv("POSTGRES_PASSWORD"),
dbname=os.getenv("POSTGRES_DB")
)
conn.autocommit = True
with conn.cursor() as cur:
if drop_existing_table:
print("Dropping existing mcq_questions table...")
cur.execute("DROP TABLE IF EXISTS mcq_questions CASCADE;")
print("Table dropped.")
print("Creating mcq_questions table...")
# --- THIS IS THE UPDATED TABLE SCHEMA ---
cur.execute("""
CREATE TABLE IF NOT EXISTS mcq_questions (
id SERIAL PRIMARY KEY,
grade INTEGER NOT NULL,
is_arabic BOOLEAN NOT NULL, -- <-- ADDED THIS LINE
subject TEXT NOT NULL,
unit TEXT NOT NULL,
concept TEXT NOT NULL,
question_text TEXT NOT NULL,
correct_answer TEXT NOT NULL,
wrong_answer_1 TEXT NOT NULL,
wrong_answer_2 TEXT NOT NULL,
wrong_answer_3 TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
""")
print("Creating indexes on mcq_questions table...")
# --- THIS IS THE UPDATED INDEX ---
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_mcq_topic
ON mcq_questions(grade, is_arabic, subject, unit, concept); -- <-- ADDED is_arabic
""")
print("MCQ table setup complete.")
except Exception as e:
print(f"An error occurred during MCQ table setup: {e}")
finally:
if 'conn' in locals() and conn:
conn.close()
print("Database connection closed.")
if __name__ == "__main__":
# To apply the changes, it's best to drop and recreate the table.
# Be careful if you have existing data you want to keep!
print("Creating MCQ table...")
setup_mcq_table(drop_existing_table=False)
\ No newline at end of file
......@@ -10,6 +10,8 @@ python insert_csv_embeddings.py
echo "Database setup complete."
python curriculum_structure.py
echo "Curriculum structure setup complete."
python setup_mcq_table.py
echo "MCQ table setup complete."
sleep 5
# Start the web server and keep it as the main process
......
"""
======================================================================
MCQ API Cookbook & Test Script
======================================================================
Purpose:
This script serves as both a live integration test and a practical guide ("cookbook")
for using the Multiple-Choice Question (MCQ) generation and retrieval API endpoints.
It demonstrates how to:
1. Generate and store new MCQs for a specific curriculum topic.
2. Retrieve existing MCQs from the database for that same topic.
----------------------------------------------------------------------
API Endpoints Guide
----------------------------------------------------------------------
There are two main endpoints for the MCQ feature:
1. Generate Questions (POST /mcq/generate)
------------------------------------------
This is the "creator" endpoint. It uses an AI model to generate a new set of questions
based on the curriculum content stored in the vector database. It then saves these
new questions to the `mcq_questions` table for future use.
- Method: POST
- URL: [BASE_URL]/mcq/generate
- Data Format: Must be sent as `application/x-www-form-urlencoded` (form data).
Parameters (Form Data):
- grade (int, required): The grade level of the curriculum (e.g., 4).
- subject (str, required): The subject of the curriculum (e.g., "Science").
- unit (str, required): The exact name of the unit.
- concept (str, required): The exact name of the concept.
- is_arabic (bool, required): Set to `true` for Arabic curriculum, `false` for English.
- count (int, optional, default=5): The number of new questions to generate.
Example Usage (using cURL):
curl -X POST [BASE_URL]/mcq/generate \
-F "grade=4" \
-F "subject=Science" \
-F "unit=الوحدة الأولى: الأنظمة الحية" \
-F "concept=المفهوم الأول: التكيف والبقاء" \
-F "is_arabic=true" \
-F "count=3"
2. Retrieve Questions (GET /mcq)
---------------------------------
This is the "reader" endpoint. It quickly and cheaply retrieves questions that have
already been generated and stored in the database. It does NOT call the AI model.
- Method: GET
- URL: [BASE_URL]/mcq
Parameters (URL Query Parameters):
- grade (int, required): The grade level.
- subject (str, required): The subject.
- unit (str, required): The unit name.
- concept (str, required): The concept name.
- is_arabic (bool, required): `true` for Arabic, `false` for English.
- limit (int, optional, default=None): The maximum number of questions to retrieve.
If omitted, it will retrieve ALL questions for that topic.
Example Usage (using cURL):
# Get the 5 most recent questions for a topic
curl "[BASE_URL]/mcq?grade=4&subject=Science&unit=...&concept=...&is_arabic=true&limit=5"
# Get ALL questions for a topic
curl "[BASE_URL]/mcq?grade=4&subject=Science&unit=...&concept=...&is_arabic=true"
----------------------------------------------------------------------
How to Run This Script
----------------------------------------------------------------------
1. Ensure your FastAPI server is running.
2. Make sure the BASE_URL variable below is set to your server's address.
3. Run the script from your terminal: python3 msq_test.py
"""
import requests
import json
import time
from typing import Optional
# The base URL of your API server.
BASE_URL = "https://voice-agent.caprover.al-arcade.com"
def test_mcq_generation(grade: int, subject: str, unit: str, concept: str, is_arabic: bool, count: int):
"""
Tests the POST /mcq/generate endpoint.
"""
endpoint = f"{BASE_URL}/mcq/generate"
payload = {
"grade": grade,
"subject": subject,
"unit": unit,
"concept": concept,
"is_arabic": is_arabic,
"count": count,
}
print(f">> Attempting to GENERATE {count} new questions for:")
print(f" Topic: Grade {grade} {subject} -> {unit} -> {concept}")
print(f" Language: {'Arabic' if is_arabic else 'English'}")
try:
response = requests.post(endpoint, data=payload, timeout=120)
if response.status_code == 200:
print(f"SUCCESS: API returned status code {response.status_code}")
data = response.json()
print(f" Message: {data.get('message')}")
if 'questions' in data and data['questions']:
print("\n --- Details of Generated Questions ---")
for i, q in enumerate(data['questions'], 1):
print(f" {i}. Question: {q['question_text']}")
print(f" Correct: {q['correct_answer']}")
print(f" Wrong 1: {q['wrong_answer_1']}")
print(f" Wrong 2: {q['wrong_answer_2']}")
print(f" Wrong 3: {q['wrong_answer_3']}\n")
return True
else:
print(f"FAILED: API returned status code {response.status_code}")
try:
error_data = response.json()
print(f" Error Detail: {error_data.get('detail', 'No detail provided.')}")
except json.JSONDecodeError:
print(f" Response was not valid JSON: {response.text}")
return False
except requests.exceptions.RequestException as e:
print(f"FAILED: An error occurred while making the request: {e}")
return False
def test_mcq_retrieval(grade: int, subject: str, unit: str, concept: str, is_arabic: bool, limit: Optional[int]):
"""
Tests the GET /mcq endpoint with detailed output.
"""
endpoint = f"{BASE_URL}/mcq"
params = {
"grade": grade,
"subject": subject,
"unit": unit,
"concept": concept,
"is_arabic": is_arabic,
}
if limit is not None:
params["limit"] = limit
limit_str = f"up to {limit}" if limit is not None else "ALL"
print(f">> Attempting to RETRIEVE {limit_str} stored questions for the same topic...")
try:
response = requests.get(endpoint, params=params, timeout=30)
if response.status_code == 200:
print(f"SUCCESS: API returned status code {response.status_code}")
data = response.json()
print(f" Found {data.get('count')} stored questions in the database.")
if 'questions' in data and data['questions']:
print("\n --- Details of Retrieved Questions ---")
for i, q in enumerate(data['questions'], 1):
print(f" {i}. Question: {q['question_text']}")
print(f" Correct: {q['correct_answer']}")
print(f" Wrong 1: {q['wrong_answer_1']}")
print(f" Wrong 2: {q['wrong_answer_2']}")
print(f" Wrong 3: {q['wrong_answer_3']}\n")
elif data.get('count') == 0:
print(" (This is expected if this is the first time generating questions for this topic)")
return True
else:
print(f"FAILED: API returned status code {response.status_code}")
try:
error_data = response.json()
print(f" Error Detail: {error_data.get('detail', 'No detail provided.')}")
except json.JSONDecodeError:
print(f" Response was not valid JSON: {response.text}")
return False
except requests.exceptions.RequestException as e:
print(f"FAILED: An error occurred while making the request: {e}")
return False
if __name__ == "__main__":
print("\n" + "="*50)
print("STARTING TEST 1: ARABIC MCQ GENERATION & RETRIEVAL")
print("="*50)
# IMPORTANT: Use actual Unit/Concept names from your database for the best results.
arabic_test_data = {
"grade": 4,
"subject": "Science",
"unit": "الوحدة الأولى: الأنظمة الحية",
"concept": "المفهوم الأول: التكيف والبقاء",
"is_arabic": True,
"count": 3
}
generation_successful = test_mcq_generation(**arabic_test_data)
if generation_successful:
print("-" * 25)
time.sleep(2)
test_mcq_retrieval(limit=None, **{k:v for k,v in arabic_test_data.items() if k != 'count'})
print("\n" + "="*50)
print("STARTING TEST 2: ENGLISH MCQ GENERATION & RETRIEVAL")
print("="*50)
english_test_data = {
"grade": 5,
"subject": "Science",
"unit": "Unit 1: Matter and Energy in Ecosystems",
"concept": "Concept 1.1: Properties of Matter",
"is_arabic": False,
"count": 2
}
generation_successful = test_mcq_generation(**english_test_data)
if generation_successful:
print("-" * 25)
time.sleep(2)
test_mcq_retrieval(limit=None, **{k:v for k,v in english_test_data.items() if k != 'count'})
print("\n" + "="*50)
print("All tests complete.")
print("="*50)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment