faster and cheeper mcq generation

60ce4a8c · salma · 67972c15 · 60ce4a8c · 60ce4a8c
Commit 60ce4a8c authored Dec 04, 2025 by salma
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 24 deletions

mcq_service.py self_hosted_env/voice_agent/services/mcq_service.py +4 -0

pgvector_service.py self_hosted_env/voice_agent/services/pgvector_service.py +28 -24

No files found.
--- a/self_hosted_env/voice_agent/services/mcq_service.py
+++ b/self_hosted_env/voice_agent/services/mcq_service.py
@@ -98,12 +98,16 @@ class MCQService:
            # Slice to exact number requested (in case we generated extras)
            final_set = accepted_mcqs[:num_questions]
            self.pgvector.insert_mcqs(final_set)
+            for q in final_set:
+                q.pop('embedding', None)
            return final_set
        # If we failed to generate enough
        if not accepted_mcqs:
            logger.warning(f"Could not generate unique questions for {concept} after {attempts} attempts.")
            return []
        return accepted_mcqs

--- a/self_hosted_env/voice_agent/services/pgvector_service.py
+++ b/self_hosted_env/voice_agent/services/pgvector_service.py
@@ -546,31 +546,35 @@ class PGVectorService:
    def get_mcqs(self, curriculum: str, grade: str, subject: str, unit: str, concept: str, is_arabic: bool, limit: Optional[int] = 10) -> List[Dict]:
-        """
+            """
-        Retrieves MCQs for a specific topic and language, now filtering by curriculum.
+            Retrieves MCQs for a specific topic and language.
-        If limit is None, it retrieves all matching questions.
+            Removes the 'embedding' field to prevent JSON serialization errors with ndarrays.
-        """
+            """
-        with self.pool_handler.get_connection() as conn:
+            with self.pool_handler.get_connection() as conn:
-            with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                with conn.cursor(cursor_factory=RealDictCursor) as cur:
-                # --- UPDATED SELECT AND WHERE CLAUSE ---
+                    query = """
-                query = """
+                        SELECT *
-                    SELECT *
+                        FROM mcq_questions
-                    FROM mcq_questions
+                        WHERE curriculum = %s AND grade = %s AND subject = %s AND unit = %s AND concept = %s AND is_arabic = %s
-                    WHERE curriculum = %s AND grade = %s AND subject = %s AND unit = %s AND concept = %s AND is_arabic = %s
+                        ORDER BY created_at DESC
-                    ORDER BY created_at DESC
+                    """
-                """
+                    params = (curriculum, grade, subject, unit, concept, is_arabic)
-                params = (curriculum, grade, subject, unit, concept, is_arabic)
+                    if limit is not None:
-                if limit is not None:
+                        query += " LIMIT %s;"
-                    query += " LIMIT %s;"
+                        params += (limit,)
-                    params += (limit,)
+                    else:
-                else:
+                        query += ";"
-                    query += ";"
-                cur.execute(query, params)
+                    cur.execute(query, params)
-                return cur.fetchall()
+                    results = cur.fetchall()
+                    for row in results:
+                        row.pop('embedding', None)
+                    return results
    def get_distinct_curricula_from_structure(self) -> List[str]: