Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
67972c15
Commit
67972c15
authored
Dec 04, 2025
by
salma
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
faster and cheeper mcq generation
parent
ff35d000
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
250 additions
and
144 deletions
+250
-144
mcq_service.py
self_hosted_env/voice_agent/services/mcq_service.py
+148
-91
pgvector_service.py
self_hosted_env/voice_agent/services/pgvector_service.py
+84
-39
setup_mcq_table.py
self_hosted_env/voice_agent/setup_mcq_table.py
+18
-14
No files found.
self_hosted_env/voice_agent/services/mcq_service.py
View file @
67972c15
This diff is collapsed.
Click to expand it.
self_hosted_env/voice_agent/services/pgvector_service.py
View file @
67972c15
...
...
@@ -543,45 +543,7 @@ class PGVectorService:
except
Exception
as
e
:
print
(
f
"❌ Database verification failed: {e}"
)
def
insert_mcqs
(
self
,
mcq_list
:
List
[
Dict
]):
"""
Inserts a batch of MCQs, now including the blooms_level field.
"""
if
not
mcq_list
:
return
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
()
as
cur
:
# --- UPDATED INSERT QUERY ---
insert_query
=
"""
INSERT INTO mcq_questions (
curriculum, grade, subject, unit, concept, question_text,
question_type, difficulty_level, blooms_level, is_arabic, correct_answer,
wrong_answer_1, wrong_answer_2, wrong_answer_3, wrong_answer_4,
question_image_url, correct_image_url, wrong_image_url_1,
wrong_image_url_2, wrong_image_url_3, wrong_image_url_4, hint
) VALUES (
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s
);
"""
# --- UPDATED DATA PREPARATION ---
data_to_insert
=
[
(
q
.
get
(
'curriculum'
),
q
.
get
(
'grade'
),
q
.
get
(
'subject'
),
q
.
get
(
'unit'
),
q
.
get
(
'concept'
),
q
.
get
(
'question_text'
),
q
.
get
(
'question_type'
),
q
.
get
(
'difficulty_level'
),
q
.
get
(
'blooms_level'
),
# <-- ADDED THIS
q
.
get
(
'is_arabic'
),
q
.
get
(
'correct_answer'
),
q
.
get
(
'wrong_answer_1'
),
q
.
get
(
'wrong_answer_2'
),
q
.
get
(
'wrong_answer_3'
),
q
.
get
(
'wrong_answer_4'
),
q
.
get
(
'question_image_url'
),
q
.
get
(
'correct_image_url'
),
q
.
get
(
'wrong_image_url_1'
),
q
.
get
(
'wrong_image_url_2'
),
q
.
get
(
'wrong_image_url_3'
),
q
.
get
(
'wrong_image_url_4'
),
q
.
get
(
'hint'
)
)
for
q
in
mcq_list
]
cur
.
executemany
(
insert_query
,
data_to_insert
)
conn
.
commit
()
logger
.
info
(
f
"Successfully inserted {len(mcq_list)} MCQs into the database."
)
def
get_mcqs
(
self
,
curriculum
:
str
,
grade
:
str
,
subject
:
str
,
unit
:
str
,
concept
:
str
,
is_arabic
:
bool
,
limit
:
Optional
[
int
]
=
10
)
->
List
[
Dict
]:
"""
...
...
@@ -668,4 +630,87 @@ class PGVectorService:
AND u->>'name' =
%
s
ORDER BY 1;
"""
,
(
curriculum
,
int
(
grade
),
subject
,
unit
))
return
[
row
[
0
]
for
row
in
cur
.
fetchall
()
if
row
[
0
]]
\ No newline at end of file
return
[
row
[
0
]
for
row
in
cur
.
fetchall
()
if
row
[
0
]]
def
check_similarity_existence
(
self
,
vector
:
List
[
float
],
curriculum
:
str
,
concept
:
str
,
threshold
:
float
=
0.92
)
->
bool
:
"""
Returns True if a question with > 92
%
similarity already exists.
"""
if
not
vector
:
return
False
# Convert similarity threshold to distance
# Cosine Distance = 1 - Cosine Similarity
# If we want Similarity > 0.92, we look for Distance < 0.08
max_distance
=
1.0
-
threshold
try
:
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
()
as
cur
:
# We filter by curriculum/concept FIRST for speed
cur
.
execute
(
"""
SELECT 1
FROM mcq_questions
WHERE curriculum =
%
s
AND concept =
%
s
AND embedding <=>
%
s <
%
s
LIMIT 1;
"""
,
(
curriculum
,
concept
,
str
(
vector
),
max_distance
)
)
return
cur
.
fetchone
()
is
not
None
except
Exception
as
e
:
logger
.
error
(
f
"Error checking duplicate: {e}"
)
return
False
def
insert_mcqs
(
self
,
mcq_list
:
List
[
Dict
]):
"""
Inserts MCQs and their Embeddings.
"""
if
not
mcq_list
:
return
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
()
as
cur
:
insert_query
=
"""
INSERT INTO mcq_questions (
curriculum, grade, subject, unit, concept, question_text,
question_type, difficulty_level, blooms_level, is_arabic, correct_answer,
wrong_answer_1, wrong_answer_2, wrong_answer_3, wrong_answer_4,
question_image_url, correct_image_url, wrong_image_url_1,
wrong_image_url_2, wrong_image_url_3, wrong_image_url_4, hint,
embedding
) VALUES (
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s
);
"""
data_to_insert
=
[]
for
q
in
mcq_list
:
# Handle embedding format
emb
=
q
.
get
(
'embedding'
)
# pgvector expects a list or a string representation of the list
emb_val
=
str
(
emb
)
if
emb
else
None
data_to_insert
.
append
((
q
.
get
(
'curriculum'
),
q
.
get
(
'grade'
),
q
.
get
(
'subject'
),
q
.
get
(
'unit'
),
q
.
get
(
'concept'
),
q
.
get
(
'question_text'
),
q
.
get
(
'question_type'
),
q
.
get
(
'difficulty_level'
),
q
.
get
(
'blooms_level'
),
q
.
get
(
'is_arabic'
),
q
.
get
(
'correct_answer'
),
q
.
get
(
'wrong_answer_1'
),
q
.
get
(
'wrong_answer_2'
),
q
.
get
(
'wrong_answer_3'
),
q
.
get
(
'wrong_answer_4'
),
q
.
get
(
'question_image_url'
),
q
.
get
(
'correct_image_url'
),
q
.
get
(
'wrong_image_url_1'
),
q
.
get
(
'wrong_image_url_2'
),
q
.
get
(
'wrong_image_url_3'
),
q
.
get
(
'wrong_image_url_4'
),
q
.
get
(
'hint'
),
emb_val
# <--- Pass the vector here
))
cur
.
executemany
(
insert_query
,
data_to_insert
)
conn
.
commit
()
logger
.
info
(
f
"Successfully inserted {len(mcq_list)} MCQs with vectors."
)
\ No newline at end of file
self_hosted_env/voice_agent/setup_mcq_table.py
View file @
67972c15
# setup_mcq_table.py
import
psycopg2
import
os
from
dotenv
import
load_dotenv
...
...
@@ -7,9 +5,6 @@ from dotenv import load_dotenv
load_dotenv
()
def
setup_mcq_table
(
drop_existing_table
:
bool
=
False
):
"""
Sets up the mcq_questions table with the final schema, now including blooms_level.
"""
try
:
conn
=
psycopg2
.
connect
(
host
=
os
.
getenv
(
"POSTGRES_HOST"
,
"localhost"
),
...
...
@@ -24,10 +19,13 @@ def setup_mcq_table(drop_existing_table: bool = False):
if
drop_existing_table
:
print
(
"Dropping existing mcq_questions table..."
)
cur
.
execute
(
"DROP TABLE IF EXISTS mcq_questions CASCADE;"
)
print
(
"Table dropped."
)
print
(
"Creating mcq_questions table with blooms_level column..."
)
# --- UPDATED SCHEMA ---
print
(
"Creating mcq_questions table..."
)
# 1. Enable the vector extension (Just in case)
cur
.
execute
(
"CREATE EXTENSION IF NOT EXISTS vector;"
)
# 2. Create Table with 'embedding vector(1536)'
cur
.
execute
(
"""
CREATE TABLE IF NOT EXISTS mcq_questions (
id SERIAL PRIMARY KEY,
...
...
@@ -53,24 +51,30 @@ def setup_mcq_table(drop_existing_table: bool = False):
wrong_image_url_3 TEXT,
wrong_image_url_4 TEXT,
hint TEXT,
embedding vector(1536),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""
)
print
(
"Creating indexes on mcq_questions table..."
)
# 3. Create HNSW Index for fast vector search
print
(
"Creating vector index..."
)
cur
.
execute
(
"""
CREATE INDEX IF NOT EXISTS idx_mcq_embedding
ON mcq_questions USING hnsw (embedding vector_cosine_ops);
"""
)
# Standard indexes
cur
.
execute
(
"""
CREATE INDEX IF NOT EXISTS idx_mcq_topic
ON mcq_questions(curriculum, grade, is_arabic, subject, unit, concept);
"""
)
print
(
"MCQ table setup complete."
)
print
(
"MCQ table setup complete
with Vector support
."
)
except
Exception
as
e
:
print
(
f
"
An error occurred during MCQ table setup
: {e}"
)
print
(
f
"
Error
: {e}"
)
finally
:
if
'conn'
in
locals
()
and
conn
:
conn
.
close
()
print
(
"Database connection closed."
)
if
'conn'
in
locals
()
and
conn
:
conn
.
close
()
if
__name__
==
"__main__"
:
print
(
"Setting up the MCQ table structure..."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment