Commit 60ce4a8c authored by salma's avatar salma

faster and cheeper mcq generation

parent 67972c15
...@@ -98,12 +98,16 @@ class MCQService: ...@@ -98,12 +98,16 @@ class MCQService:
# Slice to exact number requested (in case we generated extras) # Slice to exact number requested (in case we generated extras)
final_set = accepted_mcqs[:num_questions] final_set = accepted_mcqs[:num_questions]
self.pgvector.insert_mcqs(final_set) self.pgvector.insert_mcqs(final_set)
for q in final_set:
q.pop('embedding', None)
return final_set return final_set
# If we failed to generate enough # If we failed to generate enough
if not accepted_mcqs: if not accepted_mcqs:
logger.warning(f"Could not generate unique questions for {concept} after {attempts} attempts.") logger.warning(f"Could not generate unique questions for {concept} after {attempts} attempts.")
return [] return []
return accepted_mcqs return accepted_mcqs
......
...@@ -546,31 +546,35 @@ class PGVectorService: ...@@ -546,31 +546,35 @@ class PGVectorService:
def get_mcqs(self, curriculum: str, grade: str, subject: str, unit: str, concept: str, is_arabic: bool, limit: Optional[int] = 10) -> List[Dict]: def get_mcqs(self, curriculum: str, grade: str, subject: str, unit: str, concept: str, is_arabic: bool, limit: Optional[int] = 10) -> List[Dict]:
""" """
Retrieves MCQs for a specific topic and language, now filtering by curriculum. Retrieves MCQs for a specific topic and language.
If limit is None, it retrieves all matching questions. Removes the 'embedding' field to prevent JSON serialization errors with ndarrays.
""" """
with self.pool_handler.get_connection() as conn: with self.pool_handler.get_connection() as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur: with conn.cursor(cursor_factory=RealDictCursor) as cur:
# --- UPDATED SELECT AND WHERE CLAUSE --- query = """
query = """ SELECT *
SELECT * FROM mcq_questions
FROM mcq_questions WHERE curriculum = %s AND grade = %s AND subject = %s AND unit = %s AND concept = %s AND is_arabic = %s
WHERE curriculum = %s AND grade = %s AND subject = %s AND unit = %s AND concept = %s AND is_arabic = %s ORDER BY created_at DESC
ORDER BY created_at DESC """
"""
params = (curriculum, grade, subject, unit, concept, is_arabic)
params = (curriculum, grade, subject, unit, concept, is_arabic)
if limit is not None:
if limit is not None: query += " LIMIT %s;"
query += " LIMIT %s;" params += (limit,)
params += (limit,) else:
else: query += ";"
query += ";"
cur.execute(query, params) cur.execute(query, params)
return cur.fetchall() results = cur.fetchall()
for row in results:
row.pop('embedding', None)
return results
def get_distinct_curricula_from_structure(self) -> List[str]: def get_distinct_curricula_from_structure(self) -> List[str]:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment