Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
cea5383f
Commit
cea5383f
authored
Sep 21, 2025
by
SalmaMohammedHamedMustafa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
context aware retireval
parent
9be6da83
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1333 additions
and
244 deletions
+1333
-244
All_Curriculums_grouped.json
self_hosted_env/voice_agent/All_Curriculums_grouped.json
+180
-0
__init__.py
self_hosted_env/voice_agent/core/__init__.py
+1
-1
enums.py
self_hosted_env/voice_agent/core/enums.py
+5
-0
curriculum_structure.py
self_hosted_env/voice_agent/curriculum_structure.py
+271
-0
agent_service.py
self_hosted_env/voice_agent/services/agent_service.py
+475
-234
chat_database_service.py
..._hosted_env/voice_agent/services/chat_database_service.py
+69
-7
pgvector_service.py
self_hosted_env/voice_agent/services/pgvector_service.py
+330
-2
start.sh
self_hosted_env/voice_agent/start.sh
+2
-0
No files found.
self_hosted_env/voice_agent/All_Curriculums_grouped.json
0 → 100644
View file @
cea5383f
{
"Grade 4 Arabic curriculum"
:
{
"الوحدة الأولى: الأنظمة الحية"
:
{
"مقدمة"
:
[
9
,
12
],
"المفاهيم"
:
{
"المفهوم 1.1: التكيف والبقاء"
:
{},
"المفهوم 2.1: كيف تعمل الحواس؟"
:
{},
"المفهوم 3.1: الضوء وحاسة البصر"
:
{}
},
"مشروع الوحدة"
:
[
63
,
64
],
"المشروع بيني التخصصات"
:
[
65
,
72
],
"قيم تعلمك"
:
[
73
,
74
]
},
"الوحدة الثانية: الحركة والطاقة"
:
{
"مقدمة"
:
[
75
,
78
],
"المفاهيم"
:
{
"المفهوم 1.2: الحركة والتوقف"
:
{},
"المفهوم 2.2: الطاقة والحركة"
:
{},
"المفهوم 3.2: الطاقة والتصادم"
:
{}
},
"مشروع الوحدة"
:
[
121
,
122
],
"قيم تعلمك"
:
[
123
,
124
],
"السلامة في فصول العلوم"
:
[
125
,
126
]
}
},
"Grade 5 English curriculum"
:
{
"Unit 1: Interactions of Organisms"
:
{
"Get Started"
:
[
10
,
13
],
"Concepts"
:
{
"Concept 1.1 Plant Needs"
:
{},
"Concept 1.2 Energy Flow in Ecosystems"
:
{},
"Concept 1.3 Changes in Food Webs"
:
{}
},
"Unit Project"
:
[
59
,
59
],
"Interdisciplinary project"
:
[
60
,
67
],
"Assess your learning"
:
[
68
,
69
]
},
"Unit 2: Particles in Motion"
:
{
"Get Started"
:
[
70
,
73
],
"Concepts"
:
{
"Concept 2.1 Matter in the World around Us"
:
{},
"Concept 2.2 Describing and Measuring Matter"
:
{},
"Concept 2.3 Comparing Changes in Matter"
:
{}
},
"Unit Project"
:
[
124
,
125
],
"Assess your learning"
:
[
126
,
127
]
}
},
"Grade 6 Arabic curriculum"
:
{
"الوحدة الأولى: ما النظام؟"
:
{
"مقدمة"
:
[
8
,
11
],
"المفاهيم"
:
{
"المفهوم 1.1: الخليه كنظام"
:
{},
"المفهوم 2.1: الجسم كنظام"
:
{},
"المفهوم 3.1: الطاقة كنظام"
:
{}
},
"مشروع الوحدة"
:
[
73
,
75
],
"تقييم الوحدة"
:
[
76
,
79
]
},
"الوحدة الثانية: الحصول على الطاقة"
:
{
"مقدمة"
:
[
80
,
83
],
"المفاهيم"
:
{
"المفهوم 1.2: الطاقة الحرارية وحالات المادة"
:
{},
"المفهوم 2.2: انتقال الحرارة"
:
{}
},
"مشروع الوحدة"
:
[
120
,
121
],
"المشروع بيني التخصصات"
:
[
122
,
130
],
"تقييم الوحدة"
:
[
131
,
134
]
}
},
"Grade 6 English curriculum"
:
{
"Unit 1: What is system"
:
{
"Get Started"
:
[
9
,
12
],
"Concepts"
:
{
"Concept 1.1 The cell as a system"
:
{},
"Concept 1.2 The body as a system"
:
{},
"Concept 1.3 Energy as a system"
:
{}
},
"Unit Project"
:
[
74
,
76
],
"Unit assessment"
:
[
77
,
80
]
},
"Unit 2: Getting energy"
:
{
"Get Started"
:
[
81
,
84
],
"Concepts"
:
{
"Concept 2.1 Thermal energy and states of matter"
:
{},
"Concept 2.2 Heat transfer"
:
{}
},
"Unit Project"
:
[
121
,
122
],
"Interdisciplinary project"
:
[
123
,
131
],
"Unit assessment"
:
[
132
,
135
]
}
}
}
\ No newline at end of file
self_hosted_env/voice_agent/core/__init__.py
View file @
cea5383f
from
.enums
import
MessageType
,
ResponseStatus
,
StudentNationality
,
Models
from
.enums
import
MessageType
,
ResponseStatus
,
StudentNationality
,
Models
,
StudyLanguage
from
.config
import
AppConfig
\ No newline at end of file
self_hosted_env/voice_agent/core/enums.py
View file @
cea5383f
...
...
@@ -21,3 +21,8 @@ class Models(str, Enum):
tts
=
"gpt-4o-mini-tts"
embedding
=
"text-embedding-3-small"
transcription
=
"gpt-4o-transcribe"
class
StudyLanguage
(
Enum
):
ARABIC
=
"ARABIC"
ENGLISH
=
"ENGLISH"
\ No newline at end of file
self_hosted_env/voice_agent/curriculum_structure.py
0 → 100644
View file @
cea5383f
import
psycopg2
import
json
import
os
from
dotenv
import
load_dotenv
load_dotenv
()
# Load curriculum data from JSON file
def
load_curriculum_from_json
(
json_file_path
):
"""Load curriculum data from the provided JSON file"""
with
open
(
json_file_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
return
json
.
load
(
f
)
# Convert JSON structure to database format
def
convert_json_to_db_format
(
json_data
):
"""Convert the JSON curriculum data to the database format"""
curriculum_data
=
{}
for
curriculum_name
,
curriculum_content
in
json_data
.
items
():
# Parse curriculum name to extract grade, language, and subject
if
"Grade 4 Arabic"
in
curriculum_name
:
grade
,
is_arabic
,
subject
=
4
,
True
,
"Science"
elif
"Grade 5 English"
in
curriculum_name
:
grade
,
is_arabic
,
subject
=
5
,
False
,
"Science"
elif
"Grade 6 Arabic"
in
curriculum_name
:
grade
,
is_arabic
,
subject
=
6
,
True
,
"Science"
elif
"Grade 6 English"
in
curriculum_name
:
grade
,
is_arabic
,
subject
=
6
,
False
,
"Science"
else
:
continue
# Skip unknown curriculum formats
# Create curriculum structure
language
=
"Arabic"
if
is_arabic
else
"English"
title
=
f
"منهج العلوم للصف {grade}"
if
is_arabic
else
f
"Science Curriculum for Grade {grade}"
units
=
[]
unit_number
=
1
for
unit_name
,
unit_content
in
curriculum_content
.
items
():
# Skip non-unit sections (like introduction, projects, assessments)
if
any
(
keyword
in
unit_name
.
lower
()
for
keyword
in
[
'مقدمة'
,
'get started'
,
'مشروع'
,
'project'
,
'قيم'
,
'assess'
,
'تقييم'
,
'سلامة'
,
'safety'
]):
continue
unit
=
{
"number"
:
unit_number
,
"name"
:
unit_name
,
"description"
:
f
"وحدة {unit_number}"
if
is_arabic
else
f
"Unit {unit_number}"
,
"concepts"
:
[]
}
if
"المفاهيم"
in
unit_content
:
concept_number
=
1
for
concept_name
in
unit_content
[
"المفاهيم"
]
.
keys
():
concept
=
{
"number"
:
f
"{unit_number}.{concept_number}"
,
"name"
:
concept_name
,
"description"
:
concept_name
,
"lessons"
:
[]
# We don't have lesson details in the JSON
}
unit
[
"concepts"
]
.
append
(
concept
)
concept_number
+=
1
elif
"Concepts"
in
unit_content
:
concept_number
=
1
for
concept_name
in
unit_content
[
"Concepts"
]
.
keys
():
concept
=
{
"number"
:
f
"{unit_number}.{concept_number}"
,
"name"
:
concept_name
,
"description"
:
concept_name
,
"lessons"
:
[]
# We don't have lesson details in the JSON
}
unit
[
"concepts"
]
.
append
(
concept
)
concept_number
+=
1
units
.
append
(
unit
)
unit_number
+=
1
curriculum_data
[(
grade
,
is_arabic
,
subject
)]
=
{
"grade"
:
grade
,
"subject"
:
subject
,
"language"
:
language
,
"title"
:
title
,
"units"
:
units
}
return
curriculum_data
def
setup_curriculum_database
(
json_file_path
:
str
,
drop_existing_table
:
bool
=
False
):
"""
Sets up the curriculum structure table and populates it with data from JSON file.
Args:
json_file_path: Path to the JSON file containing curriculum data
drop_existing_table: If True, drops the existing table before creating it.
"""
try
:
# Load and convert JSON data
json_data
=
load_curriculum_from_json
(
json_file_path
)
curriculum_data
=
convert_json_to_db_format
(
json_data
)
conn
=
psycopg2
.
connect
(
host
=
os
.
getenv
(
"POSTGRES_HOST"
,
"localhost"
),
port
=
os
.
getenv
(
"POSTGRES_PORT"
,
"5432"
),
user
=
os
.
getenv
(
"POSTGRES_USER"
),
password
=
os
.
getenv
(
"POSTGRES_PASSWORD"
),
dbname
=
os
.
getenv
(
"POSTGRES_DB"
)
)
conn
.
autocommit
=
True
with
conn
.
cursor
()
as
cur
:
if
drop_existing_table
:
print
(
"Dropping existing curriculum_structure table..."
)
cur
.
execute
(
"DROP TABLE IF EXISTS curriculum_structure CASCADE;"
)
print
(
"Table dropped successfully."
)
print
(
"Creating curriculum_structure table..."
)
cur
.
execute
(
"""
CREATE TABLE IF NOT EXISTS curriculum_structure (
id SERIAL PRIMARY KEY,
grade INTEGER NOT NULL,
is_arabic BOOLEAN NOT NULL,
subject VARCHAR(100) NOT NULL DEFAULT 'Science',
curriculum_data JSONB NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(grade, is_arabic, subject)
);
"""
)
# Create indexes for better performance
cur
.
execute
(
"CREATE INDEX IF NOT EXISTS idx_curriculum_grade_lang ON curriculum_structure(grade, is_arabic);"
)
cur
.
execute
(
"CREATE INDEX IF NOT EXISTS idx_curriculum_subject ON curriculum_structure(subject);"
)
cur
.
execute
(
"CREATE INDEX IF NOT EXISTS idx_curriculum_grade_subject ON curriculum_structure(grade, subject);"
)
print
(
"Table and indexes created successfully."
)
print
(
"Inserting curriculum data from JSON..."
)
# Insert curriculum data for all combinations
for
(
grade
,
is_arabic
,
subject
),
curriculum
in
curriculum_data
.
items
():
try
:
cur
.
execute
(
"""
INSERT INTO curriculum_structure (grade, is_arabic, subject, curriculum_data)
VALUES (
%
s,
%
s,
%
s,
%
s)
ON CONFLICT (grade, is_arabic, subject)
DO UPDATE SET
curriculum_data = EXCLUDED.curriculum_data,
created_at = CURRENT_TIMESTAMP;
"""
,
(
grade
,
is_arabic
,
subject
,
json
.
dumps
(
curriculum
))
)
language_desc
=
"Arabic"
if
is_arabic
else
"English"
print
(
f
"✅ Inserted Grade {grade} {language_desc} {subject} curriculum"
)
except
Exception
as
e
:
language_desc
=
"Arabic"
if
is_arabic
else
"English"
print
(
f
"❌ Error inserting Grade {grade} {language_desc} {subject}: {e}"
)
continue
print
(
"
\n
Verifying inserted data..."
)
cur
.
execute
(
"SELECT grade, is_arabic, subject, created_at FROM curriculum_structure ORDER BY grade, is_arabic;"
)
results
=
cur
.
fetchall
()
print
(
f
"
\n
📊 Summary: {len(results)} curriculum records inserted"
)
for
row
in
results
:
grade
,
is_arabic
,
subject
,
created_at
=
row
language
=
"Arabic"
if
is_arabic
else
"English"
print
(
f
" • Grade {grade} - {language} - {subject} (Created: {created_at.strftime('
%
Y-
%
m-
%
d
%
H:
%
M')})"
)
# Test curriculum retrieval
print
(
"
\n
🧪 Testing curriculum retrieval..."
)
cur
.
execute
(
"SELECT curriculum_data FROM curriculum_structure WHERE grade = 4 AND is_arabic = true LIMIT 1;"
)
test_result
=
cur
.
fetchone
()
if
test_result
:
test_curriculum
=
test_result
[
0
]
unit_count
=
len
(
test_curriculum
.
get
(
'units'
,
[]))
print
(
f
"✅ Test successful: Grade 4 Arabic curriculum has {unit_count} units"
)
# Show first unit as example
if
unit_count
>
0
:
first_unit
=
test_curriculum
[
'units'
][
0
]
print
(
f
" First unit: {first_unit['name']} with {len(first_unit.get('concepts', []))} concepts"
)
else
:
print
(
"❌ Test failed: Could not retrieve test curriculum"
)
except
psycopg2
.
OperationalError
as
e
:
print
(
f
"❌ Database connection failed: {e}"
)
except
FileNotFoundError
as
e
:
print
(
f
"❌ JSON file not found: {e}"
)
except
json
.
JSONDecodeError
as
e
:
print
(
f
"❌ Invalid JSON format: {e}"
)
except
Exception
as
e
:
print
(
f
"❌ An error occurred: {e}"
)
finally
:
if
'conn'
in
locals
()
and
conn
:
conn
.
close
()
print
(
"
\n
🔐 Database connection closed."
)
def
verify_curriculum_structure
():
"""Verify the curriculum structure and show sample data"""
try
:
conn
=
psycopg2
.
connect
(
host
=
os
.
getenv
(
"POSTGRES_HOST"
,
"localhost"
),
port
=
os
.
getenv
(
"POSTGRES_PORT"
,
"5432"
),
user
=
os
.
getenv
(
"POSTGRES_USER"
),
password
=
os
.
getenv
(
"POSTGRES_PASSWORD"
),
dbname
=
os
.
getenv
(
"POSTGRES_DB"
)
)
with
conn
.
cursor
()
as
cur
:
print
(
"📋 Curriculum Structure Verification"
)
print
(
"="
*
50
)
# Get all curriculum records
cur
.
execute
(
"""
SELECT grade, is_arabic, subject, curriculum_data, created_at
FROM curriculum_structure
ORDER BY grade, is_arabic;
"""
)
results
=
cur
.
fetchall
()
for
row
in
results
:
grade
,
is_arabic
,
subject
,
curriculum_data
,
created_at
=
row
language
=
"العربية"
if
is_arabic
else
"English"
print
(
f
"
\n
📚 Grade {grade} - {language} - {subject}"
)
print
(
f
" Created: {created_at.strftime('
%
Y-
%
m-
%
d
%
H:
%
M')}"
)
print
(
f
" Title: {curriculum_data.get('title', 'N/A')}"
)
units
=
curriculum_data
.
get
(
'units'
,
[])
print
(
f
" Units ({len(units)}):"
)
for
unit
in
units
:
unit_name
=
unit
.
get
(
'name'
,
'N/A'
)
concepts
=
unit
.
get
(
'concepts'
,
[])
print
(
f
" • Unit {unit.get('number', '?')}: {unit_name} ({len(concepts)} concepts)"
)
for
concept
in
concepts
[:
2
]:
# Show first 2 concepts
concept_name
=
concept
.
get
(
'name'
,
'N/A'
)
lessons
=
concept
.
get
(
'lessons'
,
[])
print
(
f
" - Concept {concept.get('number', '?')}: {concept_name} ({len(lessons)} lessons)"
)
if
len
(
concepts
)
>
2
:
print
(
f
" ... and {len(concepts) - 2} more concepts"
)
except
Exception
as
e
:
print
(
f
"❌ Verification failed: {e}"
)
finally
:
if
'conn'
in
locals
()
and
conn
:
conn
.
close
()
if
__name__
==
"__main__"
:
print
(
"🚀 Setting up Curriculum Structure Database from JSON"
)
print
(
"="
*
60
)
# Path to the JSON file
json_file_path
=
"All_Curriculums_grouped.json"
# Setup curriculum database with JSON data
setup_curriculum_database
(
json_file_path
,
drop_existing_table
=
True
)
print
(
"
\n
"
+
"="
*
60
)
print
(
"🔍 Verifying Setup"
)
# Verify the setup
verify_curriculum_structure
()
\ No newline at end of file
self_hosted_env/voice_agent/services/agent_service.py
View file @
cea5383f
...
...
@@ -8,86 +8,177 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from
core
import
StudentNationality
,
Models
from
services.pgvector_service
import
PGVectorService
from
services.openai_service
import
OpenAIService
from
services.chat_database_service
import
ChatDatabaseService
from
services.chat_database_service
import
ChatDatabaseService
,
StudyLanguage
from
services.pedagogy_service
import
PedagogyService
from
services.connection_pool
import
ConnectionPool
logger
=
logging
.
getLogger
(
__name__
)
SYSTEM_PROMPTS
:
Dict
[
StudentNationality
,
str
]
=
{
StudentNationality
.
EGYPTIAN
:
"""
إنت مُدرّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
فقط لو الطِّفل سأل عن هويتك بصراحة وواضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
# Enhanced system prompts (keeping existing ones but adding curriculum awareness instructions)
ENHANCED_SYSTEM_PROMPTS
:
Dict
[
tuple
,
str
]
=
{
# ---------- Egyptian + Arabic ----------
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ARABIC
):
"""
إنك مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
فقط لو الطفّل سأل عن هويتك بصراحة ووضح (مثل "إنت مين؟"، "عرّفني بنفسك"، "إنت بتعمل إيه هنا؟")،
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
لو سأل أسئلة عامة زي "نت عارف انا مين؟" أو "إزيك؟" أو "شكرا"، رد بطريقة طبيعية ودودة باستخدام اسمه {student_name}.
أما لو سأل عن أي حاجة في العلوم أو المنهج، اشرح له بالطريقة التعليمية المناسبة.
ملاحظة مُلزِمة: كلمة "منصّة" لازم تكتبها دايمًا كده بالظبط: **مَنَصّة** (بالفتحة على الميم والنون)،
عشان الـTTS يِنطِقها صح.
في باقي الردود، رُد باللهجة المصريّة الطبيعيّة كأنّك بتكَلِّم {student_name} قصادك.
خَلّي الكلام بسيط، واضح، وقَريب من وُدنه.
الجُمَل قُصَيَّرة ومُترابطة، مِش مَقطَّعة.
اشرح كأنّك بتحكي له حِكاية أو بتورّيه حاجَة من الحَياة حَوالينا، مِش بتِقرا من كتاب.
مُمكِن تِذكُر اسم {student_name} مَرّة واحِدة في أوِّل الرّد فَقَط.
بعد كِدا مَمنوع تِكرار الاسم في نَفس الرّد، حَتّى في الأسئِلة الخِتاميّة.
مَمنوع تِستَعمِل أي ألقاب زي "يا بَطَل" أو "يا شاطِر"، الاسم الأوَّل بَس.
ولو الرّد قُصَيَّر جِدًّا (جُملة أو اتنِين)، مُمكِن تِستَغنَى عن ذِكر الاسم خالِص.
لو فيه مُصطَلَح صَعب، فَسَّره بِكِلمة أسهَل.
لو فيه رَمز كيمياوي زي H2O أو CO2، اكتُبه زي ما هو.
الأرقام العادِيّة اكتُبها بالحُروف العربي زي "اِتنِين" أو "تَلاتة".
اِستَخدِم التَّشكِيل الكامِل على كُلّ الكَلام عَشان يِطْلَع بالصِّوت زي نُطق اللّهجة المِصريّة الطَّبيعي.
لو {student_name} مكتوب بالإنجليزي، اكتُبه دايمًا بالعَربي في رُدودك.
لَمّا تِذكُر الصف {grade}، قُله بالطريقة الطبيعيّة زي ما الأطفال بيقولوها: الصف 4 = سنة رابعة ابتدائي، الصف 5 = سنة خامسة ابتدائي، وهكذا.
الهَدف: رَد قُصَيَّر يِعلِّم ويُوصَّل المَعلومة، ويِبان إن "عَنان" بيِشرَح للطفل جوّه مَنَصّة "شارِع العُلوم"، مِش كتاب بيتقري.
**عن المنهج وطريقة الشرح:**
- إنت عارف المنهج المصري للصف {grade} كويس من ملف الـ JSON.
- لو الطّفل سأل بشكل عام ("إحنا هناخد إيه؟"، "وريني المنهج"، "إيه المواضيع؟")، اعرضله المنهج بشكل واضح ومنظّم.
- لو سأل عن حاجة مُعيّنة، استخدم السياق من المنهج واربطها بالوحدات والأفكار اللي ليها علاقة.
- دايمًا قول مكان الدرس فين: "ده في الوحدة الأولى، المفهوم التاني".
- وضّح الروابط: "ده ليه علاقة باللي اتعلمناه عن..." أو "ده هيوصلنا للي هنتعلمه بعد كده عن...".
ملاحظة مُلزمة: كلمة "منصّة" لازم تكتبها دايماً كده بالضبط: **مَنَصّة** (بالفتحة على الميم والنون)،
عشان الـTTS ينطقها صح.
في باقي الردود، رَد باللهجة المصريّة الطبيعيّة كأنّك بتكَلّم {student_name} قصادك.
خَلّي الكلام بسيط، واضح، وقَريب من وجدنه.
الجُملَ قُصيرَة ومُترابطة، مُش مَقطَّعة.
اشرح كأنّك بتحكي له حكاية أو بتوريّه حاجَة من الحَياة حوالينا، مُش بتقرا من كتاب.
مُمكن تُذكر اسم {student_name} مَرّة واحدة في أوّل الرّد فَقَط.
بعد كدا مَمنوع تكرار الاسم في نَفس الرّد، حَتّى في الأسئلة الختاميّة.
مَمنوع تستَعملُ أي ألقاب زي "يا بَطَل" أو "يا شاطر"، الاسم الأوَّل بَس.
ولو الرّد قُصيرَ جدّاً (جملة أو اتنين)، مُمكن تستَغنَى عن ذكر الاسم خالص.
لو فيه مُصطَلَح صَعب، فَسّره بكلمة أسهَل.
لو فيه رَمز كيمياوي زي H2O أو CO2، اكتبه زي ما هو.
الأرقام العاديّة اكتبها بالحروف العربي زي "اتنين" أو "تَلاتة".
استخدمُ التشكيل الكامل على كُلّ الكلام عَشان يطّلع بالصّوت زي نُطق اللّهجة المصريّة الطَبيعيّ.
لو {student_name} مكتوب بالإنجليزي، اكتبه دايماً بالعَربي في ردودك.
لَمّا تُذكر الصف {grade}، قُله بالطريقة الطبيعيّة زي ما الأطفال بيقولوها: الصف 4 = سنة رابعة ابتدائي، الصف 5 = سنة خامسة ابتدائي، وهكذا.
الهَدف: رَد قُصيرَ يُعلِّم ويُوصَّل المَعلومة، ويُبان إن "عَنان" بيشرَح للطفل جوّه مَنَصّة "شارِع العلوم"، مُش كتاب بيتقري.
"""
,
StudentNationality
.
SAUDI
:
"""
إنت مُعلّم لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
فقط لو الطفل سأل عن هويتك بصراحة وو
اضح (مثل "إنت مين؟"، "عر
ّفني بنفسك"، "إنت وش تسوي هنا؟")،
# ---------- Saudi + Arabic ----------
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ARABIC
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
فقط لو الطفل سأل عن هويتك بصراحة وو
ضح (مثل "إنت مين؟"، "عرِ
ّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنص الثابت هذا:
"أنا عَنان مؤسِّس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
لو سأل أسئلة عامة مثل "نت عارف انا مين؟" أو "كيفك؟" أو "شكرا"، رد بطريقة طبيعية ودودة باستخدام اسمه {student_name}.
أما لو سأل عن أي شيء في العلوم أو المنهج، اشرح له بالطريقة التعليمية المناسبة.
**عن المنهج وطريقة الشرح:**
- إنت تعرف المنهج السعودي للصف {grade} مضبوط من ملف الـ JSON.
- لو الولد سأل بشكل عام ("وش بندرس؟"، "ورني المنهج"، "إيش المواضيع؟")، عرّض له المنهج بشكل واضح ومرتب.
- لو سأل عن موضوع محدد، استخدم السياق من المنهج واربطه بالوحدات والأفكار اللي لها علاقة.
- دايمًا قول وين مكان الدرس: "هذا في الوحدة الأولى، المفهوم الثاني".
- وضّح الترابط: "هذا مرتبط باللي تعلمناه عن..." أو "هذا يودينا للي راح نتعلمه بعدين عن...".
ملاحظة مُلز
ِمة: كلمة "منصّة" لازم تكتبها دايمًا كده بالظ
بط: **مَنَصّة** (بالفتحة على الميم والنون)،
عشان الـTTS ي
ِنطِ
قها صح.
ملاحظة مُلز
مة: كلمة "منصّة" لازم تكتبها دايماً كده بالض
بط: **مَنَصّة** (بالفتحة على الميم والنون)،
عشان الـTTS ي
نط
قها صح.
في باقي الردود، رَد باللهجة السعوديّة الطبيعيّة، كأنك تشرح له قدّامك.
خل الشرح واضح وسهل، لكن لا يكون ناشف.
اشرح كأنك تسولف معه وتشبّه بأشياء من حياته اليومية.
اشرح كأنك تسولف معه وتشبّه بأشياء من حياته اليومي
ّ
ة.
اذكر اسم {student_name} مرّة وحدة فقط في بداية الرد.
بعد كذا لا تكرره في النص ولا في الأسئلة الختامية.
بعد كذا لا تكرره في النص ولا في الأسئلة الختامي
ّ
ة.
ممنوع تستخدم أي ألقاب مثل "يا بطل" أو "يا شاطر"، الاسم الأول يكفي.
ولو الرد قصير جد
ًا (جملة أو جملتين)، تقدر ما تذكر الاسم أبدًا
.
ولو الرد قصير جد
اً (جملة أو جملتين), تقدر ما تذكر الاسم أبداً
.
لو فيه مصطلح صعب، فسّره بكلمة أبسط.
الرموز الكيميا
ئ
ية مثل H2O أو CO2 تكتب مثل ما هي.
لو فيه مصطلح صعب، فس
ِ
ّره بكلمة أبسط.
الرموز الكيميا
و
ية مثل H2O أو CO2 تكتب مثل ما هي.
الأرقام في الكلام العادي تكتبها بالحروف العربي زي "اثنين" أو "ثلاثة".
استخدم التشكيل بس على الكلمات اللي ممكن الـTTS يخبّص فيها أو يقرأها خطأ، واترك الباقي بدون تشكيل عشان يطلع طبيعي.
لو {student_name} مكتوب بالإنجليزي، اكتبه دايمًا بالعربي في ردودك.
لو {student_name} مكتوب بالإنجليزي، اكتبه دايماً بالعربي في ردودك.
لما تذكر الصف {grade}، قولها بالطريقة اللي الطفل متعود يسمعها: الصف 4 = رابع ابتدائي، الصف 5 = خامس ابتدائي، وهكذا.
الهدف: رد مبسِّط، قريب، ويبيِّن إن "عَنان" يشرح للطفل جوّه مَنَصّة "شارع العلوم"، مو يقرأ من كتاب.
"""
,
# -------- Egyptian English --------
(
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ENGLISH
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}. لو الطفّل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك")،
رُد بالنصّ الثابت ده:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
لو سأل أسئلة عامة (زي "إزيك؟"، "شكراً")، رد بطريقة طبيعية ودودة باستخدام اسمه {student_name}.
**عن المنهج وطريقة الشرح:**
- إنت عارف المنهج المصري للصف {grade} كويس من ملف الـ JSON.
- لو الطّفل سأل بشكل عام ("إحنا هناخد إيه؟"، "وريني المنهج"، "إيه المواضيع؟")، اعرضله المنهج بشكل واضح ومنظّم.
- لو سأل عن حاجة مُعيّنة، استخدم السياق من المنهج واربطها بالوحدات والأفكار اللي ليها علاقة.
- دايمًا قول مكان الدرس فين: "ده في الوحدة الأولى، المفهوم التاني".
- وضّح الروابط: "ده ليه علاقة باللي اتعلمناه عن..." أو "ده هيوصلنا للي هنتعلمه بعد كده عن...".
بالنسبة لأسئلة العلوم أو المنهج:
- Always answer **in English first**.
- After answering, ask: *"اشرحهالك بالعربي اوّ بشكل ابسط؟"*
- If the child says yes (or asks in Arabic), then give a **mixed explanation**
(**English for terminologies + simple Arabic for explanation**).
احرص إن الشرح يكون بسيط، قصير، واضح، وكأنك بتحكي له من الحياة اليومية.
اذكر اسم {student_name} مرة واحدة بس في بداية الرد. متكررهوش تاني.
ممنوع تستخدم ألقاب زي "يا بطل" أو "يا شاطر".
لو الرد قصير جداً (جملة أو اتنين) ممكن تستغنى عن الاسم.
لما تذكر الصف {grade}، قولها بالطريقة اللي الأطفال المصريين بيقولوها:
الصف 4 = سنة رابعة ابتدائي، الصف 5 = سنة خامسة ابتدائي، وهكذا.
المصطلحات العلميّة: سيبها بالإنجليزي (**roots**, **photosynthesis**, **glucose**) مع شرح بسيط.
الصيغ الكيمياويّة زي H2O أو CO2 لازم تكتب زي ما هي.
الأرقام في الجُملَ العاديّة بالإنجليزي بالحروف (two, three).
الهَدف: إجابة بالإنجليزي واضحة ومبسّطة، وبعدها عرض مساعدة إضافية بالعربي لو الطفّل حب،
بحيث يبان إن "عَنان" بيشرح جوّه مَنَصّة "شارِع العُلوم".
"""
,
# -------- Saudi English --------
(
StudentNationality
.
SAUDI
,
StudyLanguage
.
ENGLISH
):
"""
إنت مُدرِّس لطفل في ابتدائي اسمه {student_name} في الصف {grade}.
لو الطفل سأل عن هويتك بصراحة (زي "إنت مين؟"، "عرِّفني بنفسك"، "إنت وش تسوي هنا؟")،
رُد بالنصّ الثابت هذا:
"أنا عَنان مؤسس شارع العلوم، وإنت هنا على مَنَصّة Science Street Lab،
وأنا هنا عشان أساعدك تتعلَّم أي حاجة عايز تتعلَّمها في العلوم."
لو سأل أسئلة عامة (زي "كيفك؟"، "شكراً")، رد بطريقة طبيعية ودودة باستخدام اسمه {student_name}.
**عن المنهج وطريقة الشرح:**
- إنت تعرف المنهج السعودي للصف {grade} مضبوط من ملف الـ JSON.
- لو الولد سأل بشكل عام ("وش بندرس؟"، "ورني المنهج"، "إيش المواضيع؟")، عرّض له المنهج بشكل واضح ومرتب.
- لو سأل عن موضوع محدد، استخدم السياق من المنهج واربطه بالوحدات والأفكار اللي لها علاقة.
- دايمًا قول وين مكان الدرس: "هذا في الوحدة الأولى، المفهوم الثاني".
- وضّح الترابط: "هذا مرتبط باللي تعلمناه عن..." أو "هذا يودينا للي راح نتعلمه بعدين عن...".
بالنسبة لأسئلة العلوم أو المنهج:
- Always answer **in English first**.
- After answering, ask: *"اشرحهالك بالعربي اوّ بشكل ابسط؟"*
- If the child says yes (or asks in Arabic), then give a **mixed explanation**
(**English for terminologies + simple Arabic for explanation**).
خل الشرح واضح وسهل وبأمثلة من حياة الطفل اليوميّة.
اذكر اسم {student_name} مرّة وحدة فقط في بداية الرد. لا تكرره في نفس الرد.
ممنوع تستخدم ألقاب زي "يا بطل" أو "يا شاطر". الاسم الأول يكفي.
ولو الرد قصير جداً (جملة أو جملتين)، ممكن ما تذكر الاسم أبداً.
لما تذكر الصف {grade}، قُلها بالطريقة اللي الطفل متعود يسمعها: الصف 4 = رابع ابتدائي، الصف 5 = خامس ابتدائي، وهكذا.
لما تذكر الصف {grade}، قولها بالطريقة اللي الأطفال السعوديين متعودين عليها:
الصف 4 = رابع ابتدائي، الصف 5 = خامس ابتدائي، وهكذا.
الهدف: رد مبسّط، قريب، ويبيّن إن "عَنان" يشرح للطفل جوّه مَنَصّة "شارع العلوم"، مو يقرأ من كتاب.
المصطلحات العلميّة: خليها بالإنجليزي (**roots**, **photosynthesis**, **glucose**) مع شرح مبسّط.
الصيغ الكيمياويّة مثل H2O أو CO2 لازم تكتب مثل ما هي.
الأرقام في النصوص العاديّة بالإنجليزي بالحروف (two, three).
الهدف: إجابة بالإنجليزي مبسّطة، وبعدها عرض مساعدة بالعربي لو الطفل حب،
عشان يبان إن "عَنان" يشرح داخل مَنَصّة "شارع العلوم".
"""
}
}
class
AgentService
:
"""
Service class for handling AI agent conversations using database memory
"""
"""
Enhanced service class for handling AI agent conversations with JSON-based curriculum structure
"""
def
__init__
(
self
,
use_pgvector
:
bool
=
True
,
pool_handler
:
Optional
[
ConnectionPool
]
=
None
):
self
.
openai_service
=
OpenAIService
()
...
...
@@ -114,6 +205,8 @@ class AgentService:
self
.
db_service
=
ChatDatabaseService
(
self
.
pool_handler
)
if
self
.
use_pgvector
:
self
.
pgvector
=
PGVectorService
(
self
.
pool_handler
)
# Setup curriculum table if needed
self
.
pgvector
.
setup_curriculum_table
()
else
:
self
.
pgvector
=
None
...
...
@@ -140,23 +233,172 @@ class AgentService:
except
Exception
as
e
:
logger
.
error
(
f
"Error adding message to history for {student_id}: {e}"
)
def
get_available_subjects
(
self
,
student_id
:
str
)
->
List
[
str
]:
"""Get available subjects for the student based on their grade and language"""
if
not
self
.
pgvector
:
return
[]
def
classify_query_type
(
self
,
query
:
str
,
student_info
:
Dict
)
->
str
:
"""Enhanced query classification using LLM based on JSON structure"""
if
not
self
.
is_available
():
return
"specific_content"
is_arabic
=
student_info
.
get
(
'is_arabic'
,
True
)
grade
=
student_info
.
get
(
'grade'
,
4
)
classification_prompt
=
f
"""
صنف السؤال التالي إلى إحدى الفئات الأربع:
1. "general_chat" - أسئلة دردشة عامة وشخصية عن الطالب أو المدرس
أمثلة: "إنت مين؟", "إزيك؟", "نت عارف انا مين؟", "انت عارف انا في سنة كام؟", "شكرا ليك", "who are you?", "how are you?", "do you know me?", "thank you"
2. "overview" - أسئلة عن نظرة عامة على المنهج أو المحتوى الكامل
أمثلة: "ماذا ندرس؟", "أظهر المنهج", "what do we study?", "show curriculum"
3. "navigation" - أسئلة عن وحدة أو مفهوم معين
أمثلة: "ما في الوحدة الأولى؟", "what's in unit 1?", "أخبرني عن مفهوم الطاقة"
4. "specific_content" - أسئلة محددة عن موضوع علمي معين
أمثلة: "ما هو التمثيل الضوئي؟", "what is photosynthesis?", "كيف تعمل الخلية؟"
السؤال: "{query}"
الطالب يدرس باللغة: {"العربية" if is_arabic else "الإنجليزية"}
الصف: {grade}
رد فقط بكلمة واحدة: general_chat أو overview أو navigation أو specific_content
"""
try
:
student_info
=
self
.
db_service
.
get_student_info
(
student_id
)
if
not
student_info
:
return
[]
response
=
self
.
client
.
chat
.
completions
.
create
(
model
=
"gpt-4o-mini"
,
messages
=
[{
"role"
:
"user"
,
"content"
:
classification_prompt
}],
temperature
=
0
,
max_tokens
=
10
)
classification
=
response
.
choices
[
0
]
.
message
.
content
.
strip
()
.
lower
()
return
self
.
pgvector
.
get_subjects_by_grade_and_language
(
if
classification
in
[
"general_chat"
,
"overview"
,
"navigation"
,
"specific_content"
]:
logger
.
info
(
f
"Query classified as: {classification} for query: '{query}'"
)
return
classification
else
:
logger
.
warning
(
f
"Unknown classification: {classification}, defaulting to specific_content"
)
return
"specific_content"
except
Exception
as
e
:
logger
.
warning
(
f
"Error in query classification: {e}, defaulting to specific_content"
)
return
"specific_content"
def
handle_overview_query
(
self
,
student_info
:
Dict
,
subject
:
str
=
"Science"
)
->
str
:
"""Handle curriculum overview queries using JSON-based data"""
if
not
self
.
pgvector
:
if
student_info
[
'study_language'
]
==
StudyLanguage
.
ARABIC
:
return
f
"عذراً، لا يمكنني عرض المنهج حالياً للصف {student_info['grade']}"
else
:
return
f
"Sorry, I cannot show the curriculum for Grade {student_info['grade']} right now"
return
self
.
pgvector
.
get_overview_response
(
student_info
[
'grade'
],
student_info
[
'is_arabic'
]
student_info
[
'is_arabic'
],
subject
)
except
Exception
as
e
:
logger
.
error
(
f
"Error getting available subjects for {student_id}: {e}"
)
return
[]
def
handle_general_chat_query
(
self
,
query
:
str
,
student_info
:
Dict
)
->
str
:
"""Handle general chat queries using only student information"""
student_name
=
student_info
.
get
(
'student_name'
,
'الطالب'
)
grade
=
student_info
.
get
(
'grade'
,
4
)
nationality
=
student_info
.
get
(
'nationality'
,
'مصري'
)
is_arabic
=
student_info
.
get
(
'is_arabic'
,
True
)
study_language
=
student_info
[
'study_language'
]
# Create a simple context with student info only
if
is_arabic
:
context
=
f
"""
معلومات الطالب:
- الاسم: {student_name}
- الصف: {grade}
- الجنسية: {nationality}
- لغة الدراسة: {"العربية" if is_arabic else "الإنجليزية"}
السؤال: "{query}"
أجب بناء على معلومات الطالب فقط. لا تستخدم أي معلومات من المنهج أو محتوى تعليمي.
إذا سأل الطالب عن هويتك، استخدم الرد المحدد في التعليمات.
إذا سأل عن معلوماته الشخصية، استخدم البيانات المتاحة أعلاه.
كن ودوداً وبسيطاً في الرد.
"""
else
:
context
=
f
"""
Student Information:
- Name: {student_name}
- Grade: {grade}
- Nationality: {nationality}
- Study Language: {"Arabic" if is_arabic else "English"}
Question: "{query}"
Answer based only on the student's information above. Do not use any curriculum or educational content.
If the student asks about your identity, use the specified response in the instructions.
If they ask about their personal information, use the data available above.
Be friendly and simple in your response.
"""
return
context
"""Handle unit/concept navigation queries using JSON structure"""
if
not
self
.
pgvector
:
return
self
.
handle_overview_query
(
student_info
,
subject
)
return
self
.
pgvector
.
get_unit_navigation_response
(
query
,
student_info
[
'grade'
],
student_info
[
'is_arabic'
],
subject
)
def
generate_enhanced_context
(
self
,
search_results
:
List
[
Dict
],
student_info
:
Dict
,
query_type
:
str
)
->
str
:
"""Generate enhanced context with JSON-based curriculum structure awareness"""
if
not
search_results
:
return
""
is_arabic
=
student_info
[
'is_arabic'
]
study_language
=
student_info
[
'study_language'
]
grade
=
student_info
[
'grade'
]
if
study_language
==
StudyLanguage
.
ENGLISH
:
context_message
=
f
"📚 من المنهج الإنجليزي لمادة العلوم للصف {grade}:
\n\n
"
else
:
context_message
=
f
"📚 من المنهج العربي لمادة العلوم للصف {grade}:
\n\n
"
for
result
in
search_results
:
# Basic information
unit_info
=
f
"الوحدة: {result['unit']}"
if
result
.
get
(
'unit'
)
else
""
concept_info
=
f
"المفهوم: {result['concept']}"
if
result
.
get
(
'concept'
)
else
""
lesson_info
=
f
"الدرس: {result['lesson']}"
if
result
.
get
(
'lesson'
)
else
""
# Build header
context_parts
=
[
info
for
info
in
[
unit_info
,
concept_info
,
lesson_info
]
if
info
]
if
context_parts
:
context_message
+=
f
"**{' → '.join(context_parts)}**
\n
"
# Add content
context_message
+=
f
"{result['chunk_text']}
\n
"
# Add curriculum context if available
if
'curriculum_context'
in
result
:
ctx
=
result
[
'curriculum_context'
]
if
ctx
.
get
(
'navigation_hint'
):
context_message
+=
f
"
\n
💡 {ctx['navigation_hint']}
\n
"
if
ctx
.
get
(
'related_concepts'
)
and
query_type
==
"specific_content"
:
related
=
', '
.
join
(
ctx
[
'related_concepts'
][:
3
])
if
is_arabic
:
context_message
+=
f
"🔗 مفاهيم ذات صلة: {related}
\n
"
else
:
context_message
+=
f
"🔗 Related concepts: {related}
\n
"
context_message
+=
"
\n
---
\n\n
"
# Add instruction for using the context
if
study_language
==
StudyLanguage
.
ENGLISH
:
context_message
+=
f
"استخدم هذه المعلومات لتقديم شرح دقيق للطفل. المنهج إنجليزي فاذكر المصطلحات الإنجليزية مع الشرح بالعربي."
else
:
context_message
+=
f
"استخدم هذه المعلومات لتقديم شرح دقيق ومناسب للطفل باستخدام المصطلحات العربية."
return
context_message
def
generate_response
(
self
,
...
...
@@ -167,19 +409,20 @@ class AgentService:
temperature
:
float
=
0.3
,
top_k
:
int
=
3
)
->
str
:
"""
Generate AI response using database memory with optional retrieval based on question type
"""
"""
Enhanced AI response generation with JSON-based curriculum structure awareness
"""
if
not
self
.
is_available
():
raise
HTTPException
(
status_code
=
500
,
detail
=
"Agent service not available"
)
try
:
# Get student info
# Get student info
with explicit language
student_info
=
self
.
db_service
.
get_student_info
(
student_id
)
if
not
student_info
:
raise
HTTPException
(
status_code
=
404
,
detail
=
f
"Student with ID {student_id} not found"
)
# Extract
first name
# Extract
information
full_name
=
student_info
.
get
(
'student_name'
,
'الطالب'
)
student_name
=
full_name
.
split
()[
0
]
if
full_name
else
"الطالب"
study_language
=
student_info
[
'study_language'
]
# Map nationality
nationality_lower
=
student_info
[
'nationality'
]
.
lower
()
.
strip
()
...
...
@@ -193,66 +436,56 @@ class AgentService:
self
.
add_message_to_history
(
student_id
,
user_message
,
"user"
)
conversation_history
=
self
.
get_conversation_history
(
student_id
)
# Format system prompt
base_system_prompt
=
SYSTEM_PROMPTS
.
get
(
nationality
,
SYSTEM_PROMPTS
[
StudentNationality
.
EGYPTIAN
])
# Classify query type based on JSON structure
query_type
=
self
.
classify_query_type
(
user_message
,
student_info
)
logger
.
info
(
f
"Query type: {query_type} for student {student_name} ({study_language.value})"
)
# Get appropriate system prompt
prompt_key
=
(
nationality
,
study_language
)
base_system_prompt
=
ENHANCED_SYSTEM_PROMPTS
.
get
(
prompt_key
,
ENHANCED_SYSTEM_PROMPTS
.
get
((
StudentNationality
.
EGYPTIAN
,
StudyLanguage
.
ARABIC
),
""
))
formatted_base_prompt
=
base_system_prompt
.
format
(
student_name
=
student_name
,
grade
=
student_info
[
'grade'
]
)
subject_specific_prompt
=
f
"{formatted_base_prompt}
\n\n
إنت بتدرّس مادة {subject} للطفل {student_name}."
# Add Socratic instructions if any
socratic_instructions
=
self
.
pedagogy_service
.
get_socratic_instructions
(
student_info
[
'grade'
],
student_info
[
'nationality'
]
)
if
socratic_instructions
:
subject_specific
_prompt
+=
f
"
\n\n
{socratic_instructions}"
formatted_base
_prompt
+=
f
"
\n\n
{socratic_instructions}"
# Prepare messages
messages
=
[]
has_system_message
=
conversation_history
and
conversation_history
[
0
]
.
get
(
"role"
)
==
"system"
if
not
has_system_message
:
messages
.
append
({
"role"
:
"system"
,
"content"
:
subject_specific
_prompt
})
self
.
add_message_to_history
(
student_id
,
subject_specific
_prompt
,
"system"
)
messages
.
append
({
"role"
:
"system"
,
"content"
:
formatted_base
_prompt
})
self
.
add_message_to_history
(
student_id
,
formatted_base
_prompt
,
"system"
)
messages
.
extend
(
conversation_history
)
# ----------------- DYNAMIC RETRIEVAL DECISION -----------------
# Ask model to classify if retrieval needed
try
:
classification_prompt
=
f
"""
صنف السؤال التالي: هل يحتاج لإجابة تعتمد على المنهج الدراسي أو محتوى المادة العلمية المتخصصة؟
رد فقط بـ "YES" لو يحتاج retrieval من المحتوى التعليمي، و "NO" لو مجرد سؤال عام أو عن الشخصية أو محادثة عادية.
# Handle different query types with JSON-based curriculum awareness
if
query_type
==
"general_chat"
:
# Handle general chat with student info only
chat_context
=
self
.
handle_general_chat_query
(
user_message
,
student_info
)
messages
.
append
({
"role"
:
"system"
,
"content"
:
f
"سياق المحادثة العامة:
\n
{chat_context}"
})
أمثلة على أسئلة تحتاج "YES
":
- ما هو التمثيل الضوئي؟
- اشرح لي الجهاز الهضمي
- كيف تتكون الأمطار؟
elif
query_type
==
"overview
"
:
# Direct curriculum overview from JSON
overview_response
=
self
.
handle_overview_query
(
student_info
,
subject
)
messages
.
append
({
"role"
:
"system"
,
"content"
:
f
"المنهج الكامل من ملف JSON:
\n
{overview_response}"
})
أمثلة على أسئلة تحتاج "NO":
- إنت مين؟
- إزيك؟
- نت عارف انا مين؟
- شكرا ليك
elif
query_type
==
"navigation"
:
# Unit/concept navigation from JSON structure
navigation_response
=
self
.
handle_navigation_query
(
user_message
,
student_info
,
subject
)
messages
.
append
({
"role"
:
"system"
,
"content"
:
f
"تفاصيل الوحدة/المفهوم من JSON:
\n
{navigation_response}"
})
السؤال: "{user_message}"
"""
classification_response
=
self
.
client
.
chat
.
completions
.
create
(
model
=
"gpt-4o-mini"
,
messages
=
[{
"role"
:
"user"
,
"content"
:
classification_prompt
}],
temperature
=
0
)
classification_answer
=
classification_response
.
choices
[
0
]
.
message
.
content
.
strip
()
.
upper
()
need_retrieval
=
classification_answer
==
"YES"
logger
.
info
(
f
"Classification for '{user_message}': {classification_answer} (need_retrieval: {need_retrieval})"
)
except
Exception
as
e
:
logger
.
warning
(
f
"Error in classification, defaulting to no retrieval: {e}"
)
need_retrieval
=
False
# ----------------- RETRIEVAL (if needed) -----------------
if
self
.
pgvector
and
need_retrieval
:
elif
query_type
==
"specific_content"
and
self
.
pgvector
:
# Enhanced content search with JSON-based curriculum context
try
:
query_embedding
=
self
.
openai_service
.
generate_embedding
(
user_message
)
neighbors
=
self
.
pgvector
.
search_filtered_neares
t
(
search_results
=
self
.
pgvector
.
search_with_curriculum_contex
t
(
query_embedding
=
query_embedding
,
grade
=
student_info
[
'grade'
],
subject
=
subject
,
...
...
@@ -260,25 +493,19 @@ class AgentService:
limit
=
top_k
)
relevant_neighbors
=
[
n
for
n
in
neighbors
if
n
[
'distance'
]
<
1.3
]
if
neighbors
else
[]
if
relevant_neighbors
:
context_message
=
f
"معلومات من المنهج لمادة {subject} للصف {student_info['grade']}:
\n\n
"
for
n
in
relevant_neighbors
:
unit_info
=
f
"الوحدة: {n['unit']}"
if
n
.
get
(
'unit'
)
else
""
concept_info
=
f
"المفهوم: {n['concept']}"
if
n
.
get
(
'concept'
)
else
""
lesson_info
=
f
"الدرس: {n['lesson']}"
if
n
.
get
(
'lesson'
)
else
""
context_header
=
" - "
.
join
(
filter
(
None
,
[
unit_info
,
concept_info
,
lesson_info
]))
if
context_header
:
context_message
+=
f
"**{context_header}**
\n
"
context_message
+=
f
"{n['chunk_text']}
\n\n
---
\n\n
"
context_message
+=
"استخدم هذه المعلومات لتقديم شرح دقيق ومناسب للطفل."
messages
.
append
({
"role"
:
"system"
,
"content"
:
context_message
})
relevant_results
=
[
r
for
r
in
search_results
if
r
[
'distance'
]
<
1.3
]
if
search_results
else
[]
if
relevant_results
:
enhanced_context
=
self
.
generate_enhanced_context
(
relevant_results
,
student_info
,
query_type
)
messages
.
append
({
"role"
:
"system"
,
"content"
:
enhanced_context
})
logger
.
info
(
f
"Added enhanced JSON-based context with {len(relevant_results)} chunks"
)
except
Exception
as
e
:
logger
.
warning
(
f
"Error
using pgvector
: {e}"
)
logger
.
warning
(
f
"Error
in enhanced content search
: {e}"
)
#
----------------- GENERATE RESPONSE -----------------
#
Generate response
response
=
self
.
client
.
chat
.
completions
.
create
(
model
=
model
,
messages
=
messages
,
...
...
@@ -290,6 +517,9 @@ class AgentService:
# Save AI response
self
.
add_message_to_history
(
student_id
,
ai_response
,
"assistant"
)
logger
.
info
(
f
"Generated {query_type} response for {student_name} ({study_language.value}): {len(ai_response)} characters"
)
return
ai_response
except
HTTPException
:
...
...
@@ -300,7 +530,7 @@ class AgentService:
def
search_similar
(
self
,
query_embedding
:
List
[
float
],
student_id
:
str
,
subject
:
str
=
"chemistry"
,
top_k
:
int
=
3
):
"""Search similar content with student-specific filtering"""
"""Search similar content with student-specific filtering
and JSON-based curriculum awareness
"""
if
not
self
.
pgvector
:
raise
HTTPException
(
status_code
=
400
,
detail
=
"PGVector service not enabled"
)
...
...
@@ -309,7 +539,9 @@ class AgentService:
if
not
student_info
:
raise
HTTPException
(
status_code
=
404
,
detail
=
f
"Student with ID {student_id} not found"
)
return
self
.
pgvector
.
search_filtered_nearest
(
logger
.
info
(
f
"Enhanced search for student {student_info['student_name']} who studies in {student_info['study_language'].value}"
)
return
self
.
pgvector
.
search_with_curriculum_context
(
query_embedding
=
query_embedding
,
grade
=
student_info
[
'grade'
],
subject
=
subject
,
...
...
@@ -319,57 +551,63 @@ class AgentService:
except
HTTPException
:
raise
except
Exception
as
e
:
logger
.
error
(
f
"Error in search_similar: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Search failed: {str(e)}"
)
logger
.
error
(
f
"Error in enhanced search_similar: {e}"
)
raise
HTTPException
(
status_code
=
500
,
detail
=
f
"Enhanced search failed: {str(e)}"
)
def
get_available_subjects
(
self
,
student_id
:
str
)
->
List
[
str
]:
"""Get available subjects for the student based on their grade and language from JSON data"""
if
not
self
.
pgvector
:
return
[]
def
update_student_subject_context
(
self
,
student_id
:
str
,
subject
:
str
):
"""Update the system message for a new subject"""
try
:
student_info
=
self
.
db_service
.
get_student_info
(
student_id
)
if
not
student_info
:
return
False
# Extract student name
full_name
=
student_info
.
get
(
'student_name'
,
'الطالب'
)
student_name
=
full_name
.
split
()[
0
]
if
full_name
else
"الطالب"
# Clear existing history to reset context
self
.
db_service
.
clear_history
(
student_id
)
# Set new system message with subject and student name
nationality_lower
=
student_info
[
'nationality'
]
.
lower
()
.
strip
()
nationality_mapping
=
{
'egyptian'
:
StudentNationality
.
EGYPTIAN
,
'saudi'
:
StudentNationality
.
SAUDI
}
return
[]
nationality
=
nationality_mapping
.
get
(
nationality_lower
,
StudentNationality
.
EGYPTIAN
)
base_system_prompt
=
SYSTEM_PROMPTS
.
get
(
nationality
,
SYSTEM_PROMPTS
[
StudentNationality
.
EGYPTIAN
])
# Format the prompt with student name
formatted_base_prompt
=
base_system_prompt
.
format
(
student_name
=
student_name
,
grade
=
student_info
[
'grade'
]
return
self
.
pgvector
.
get_subjects_by_grade_and_language
(
student_info
[
'grade'
],
student_info
[
'is_arabic'
]
)
except
Exception
as
e
:
logger
.
error
(
f
"Error getting available subjects for {student_id}: {e}"
)
return
[]
subject_specific_prompt
=
f
"{formatted_base_prompt}
\n\n
إنت بتدرّس مادة {subject} للطفل {student_name}."
def
get_curriculum_overview
(
self
,
student_id
:
str
,
subject
:
str
=
"Science"
)
->
Dict
:
"""Get curriculum overview for a specific student based on JSON data"""
if
not
self
.
pgvector
:
return
{
"error"
:
"PGVector service not available"
}
# Add Socratic questioning instructions if applicable
socratic_instructions
=
self
.
pedagogy_service
.
get_socratic_instructions
(
try
:
student_info
=
self
.
db_service
.
get_student_info
(
student_id
)
if
not
student_info
:
return
{
"error"
:
"Student not found"
}
curriculum
=
self
.
pgvector
.
get_curriculum_structure
(
student_info
[
'grade'
],
student_info
[
'nationality'
]
student_info
[
'is_arabic'
],
subject
)
if
socratic_instructions
:
subject_specific_prompt
+=
f
"
\n\n
{socratic_instructions}"
logger
.
info
(
f
"Added Socratic instructions when updating subject context for grade {student_info['grade']}"
)
self
.
add_message_to_history
(
student_id
,
subject_specific_prompt
,
"system"
)
if
not
curriculum
:
return
{
"error"
:
f
"No curriculum found for Grade {student_info['grade']} in {student_info['study_language'].value}"
}
logger
.
info
(
f
"Updated subject context to {subject} for student {student_id} ({student_name})"
)
return
True
return
{
"student_info"
:
{
"name"
:
student_info
[
'student_name'
],
"grade"
:
student_info
[
'grade'
],
"study_language"
:
student_info
[
'study_language'
]
.
value
,
"nationality"
:
student_info
[
'nationality'
]
},
"curriculum"
:
curriculum
,
"available_units"
:
self
.
pgvector
.
get_units_for_grade
(
student_info
[
'grade'
],
student_info
[
'is_arabic'
],
subject
),
"source"
:
"JSON-based curriculum structure"
}
except
Exception
as
e
:
logger
.
error
(
f
"Error
updating subject context
: {e}"
)
return
False
logger
.
error
(
f
"Error
getting curriculum overview
: {e}"
)
return
{
"error"
:
str
(
e
)}
def
export_conversation
(
self
,
student_id
:
str
)
->
List
[
Dict
[
str
,
str
]]:
"""Export conversation history for a student"""
...
...
@@ -379,22 +617,6 @@ class AgentService:
logger
.
error
(
f
"Error exporting conversation for {student_id}: {e}"
)
return
[]
def
import_conversation
(
self
,
messages
:
List
[
Dict
[
str
,
str
]],
student_id
:
str
):
"""Import conversation history for a student"""
try
:
# Clear existing history first
self
.
db_service
.
clear_history
(
student_id
)
# Import messages in order
for
message
in
messages
:
role
=
message
.
get
(
"role"
,
"user"
)
content
=
message
.
get
(
"content"
,
""
)
if
content
:
self
.
add_message_to_history
(
student_id
,
content
,
role
)
except
Exception
as
e
:
logger
.
error
(
f
"Error importing conversation for {student_id}: {e}"
)
raise
def
clear_conversation
(
self
,
student_id
:
str
)
->
Dict
[
str
,
str
]:
"""Clear conversation history for a student"""
try
:
...
...
@@ -411,97 +633,116 @@ class AgentService:
}
def
get_agent_stats
(
self
,
student_id
:
str
)
->
Dict
:
"""Get conversation statistics for a student"""
"""Get conversation statistics for a student
with enhanced JSON-based curriculum info
"""
try
:
history
=
self
.
get_conversation_history
(
student_id
)
user_messages
=
[
msg
for
msg
in
history
if
msg
[
'role'
]
==
'user'
]
assistant_messages
=
[
msg
for
msg
in
history
if
msg
[
'role'
]
==
'assistant'
]
system_messages
=
[
msg
for
msg
in
history
if
msg
[
'role'
]
==
'system'
]
# Get student language info
student_info
=
self
.
db_service
.
get_student_info
(
student_id
)
language_info
=
{}
curriculum_info
=
{}
if
student_info
:
language_info
=
{
"study_language"
:
student_info
[
'study_language'
]
.
value
,
"nationality"
:
student_info
[
'nationality'
],
"grade"
:
student_info
[
'grade'
]
}
# Add JSON-based curriculum availability info
if
self
.
pgvector
:
curriculum
=
self
.
pgvector
.
get_curriculum_structure
(
student_info
[
'grade'
],
student_info
[
'is_arabic'
]
)
curriculum_info
=
{
"curriculum_available"
:
curriculum
is
not
None
,
"curriculum_source"
:
"JSON file"
if
curriculum
else
"None"
,
"available_subjects"
:
self
.
pgvector
.
get_subjects_by_grade_and_language
(
student_info
[
'grade'
],
student_info
[
'is_arabic'
]
),
"available_units"
:
len
(
curriculum
.
get
(
'units'
,
[]))
if
curriculum
else
0
}
return
{
"student_id"
:
student_id
,
"total_messages"
:
len
(
history
),
"user_messages"
:
len
(
user_messages
),
"assistant_messages"
:
len
(
assistant_messages
),
"system_messages"
:
len
(
system_messages
),
"conversation_active"
:
len
(
history
)
>
0
"conversation_active"
:
len
(
history
)
>
0
,
**
language_info
,
**
curriculum_info
}
except
Exception
as
e
:
logger
.
error
(
f
"Error getting agent stats: {e}"
)
logger
.
error
(
f
"Error getting
enhanced
agent stats: {e}"
)
return
{
"student_id"
:
student_id
,
"error"
:
str
(
e
)
}
def
set_system_prompt
(
self
,
prompt
:
str
)
->
Dict
[
str
,
str
]:
"""Set a new system prompt (this is a placeholder - actual implementation would depend on requirements)"""
# This method would need to be implemented based on your specific requirements
# for how system prompts should be managed globally vs per student
return
{
"status"
:
"success"
,
"message"
:
"System prompt updated"
}
@
property
def
system_prompt
(
self
)
->
str
:
"""Get the current system prompt"""
# Return a default system prompt - this could be made more sophisticated
return
"Default system prompt for educational AI assistant"
def
debug_retrieval_pipeline
(
self
,
student_id
:
str
,
query
:
str
):
"""Debug function to trace the retrieval pipeline"""
print
(
"=== RETRIEVAL DEBUG PIPELINE ==="
)
def
get_curriculum_structure_info
(
self
,
student_id
:
str
,
subject
:
str
=
"Science"
)
->
Dict
:
"""Get detailed curriculum structure information from JSON data"""
if
not
self
.
pgvector
:
return
{
"error"
:
"PGVector service not available"
}
try
:
# 1. Check student info
student_info
=
self
.
db_service
.
get_student_info
(
student_id
)
print
(
f
"1. Student Info: {student_info}"
)
if
not
student_info
:
print
(
"❌ No student info found!"
)
return
# 2. Test embedding generation
print
(
f
"2. Testing embedding generation for query: '{query}'"
)
query_embedding
=
self
.
openai_service
.
generate_embedding
(
query
)
print
(
f
"✅ Generated embedding (length: {len(query_embedding)})"
)
return
{
"error"
:
"Student not found"
}
# 3. Test vector search
print
(
f
"3. Testing vector search..."
)
if
self
.
pgvector
:
neighbors
=
self
.
pgvector
.
search_filtered_nearest
(
query_embedding
=
query_embedding
,
grade
=
student_info
[
'grade'
],
subject
=
"Science"
,
is_arabic
=
student_info
[
'is_arabic'
],
limit
=
3
curriculum
=
self
.
pgvector
.
get_curriculum_structure
(
student_info
[
'grade'
],
student_info
[
'is_arabic'
],
subject
)
print
(
f
"✅ Found {len(neighbors)} neighbors:"
)
for
i
,
neighbor
in
enumerate
(
neighbors
):
print
(
f
" {i+1}. Distance: {neighbor['distance']:.3f}"
)
print
(
f
" Unit: {neighbor.get('unit', 'N/A')}"
)
print
(
f
" Concept: {neighbor.get('concept', 'N/A')}"
)
print
(
f
" Text preview: {neighbor['chunk_text'][:100]}..."
)
print
()
else
:
print
(
"❌ PGVector not available"
)
# 4. Test full response generation
print
(
f
"4. Testing full response generation..."
)
response
=
self
.
generate_response
(
user_message
=
query
,
student_id
=
student_id
,
subject
=
"Science"
)
print
(
f
"✅ Generated response (first 200 chars): {response[:200]}..."
)
if
not
curriculum
:
return
{
"error"
:
f
"No curriculum structure found"
,
"grade"
:
student_info
[
'grade'
],
"language"
:
"Arabic"
if
student_info
[
'is_arabic'
]
else
"English"
,
"subject"
:
subject
}
# Extract detailed structure info
units_info
=
[]
if
'units'
in
curriculum
:
for
unit
in
curriculum
[
'units'
]:
unit_info
=
{
"number"
:
unit
.
get
(
'number'
),
"name"
:
unit
.
get
(
'name'
),
"description"
:
unit
.
get
(
'description'
,
''
),
"concepts_count"
:
len
(
unit
.
get
(
'concepts'
,
[])),
"concepts"
:
[]
}
print
(
"=== DEBUG COMPLETE ==="
)
for
concept
in
unit
.
get
(
'concepts'
,
[]):
concept_info
=
{
"number"
:
concept
.
get
(
'number'
),
"name"
:
concept
.
get
(
'name'
),
"description"
:
concept
.
get
(
'description'
,
''
),
"lessons_count"
:
len
(
concept
.
get
(
'lessons'
,
[]))
}
unit_info
[
"concepts"
]
.
append
(
concept_info
)
units_info
.
append
(
unit_info
)
return
{
"student_info"
:
{
"grade"
:
student_info
[
'grade'
],
"language"
:
"Arabic"
if
student_info
[
'is_arabic'
]
else
"English"
,
"nationality"
:
student_info
[
'nationality'
]
},
"curriculum_title"
:
curriculum
.
get
(
'title'
,
''
),
"total_units"
:
len
(
units_info
),
"units"
:
units_info
,
"source"
:
"JSON-based curriculum file"
}
except
Exception
as
e
:
print
(
f
"❌ Debug pipeline failed at step: {e}"
)
import
traceback
traceback
.
print_exc
()
logger
.
error
(
f
"Error getting curriculum structure info: {e}"
)
return
{
"error"
:
str
(
e
)}
def
close
(
self
):
"""Close database connection pools"""
...
...
self_hosted_env/voice_agent/services/chat_database_service.py
View file @
cea5383f
...
...
@@ -4,11 +4,15 @@ from psycopg2.extras import RealDictCursor
from
typing
import
List
,
Dict
,
Optional
,
Tuple
import
logging
from
services.connection_pool
import
ConnectionPool
from
enum
import
Enum
import
sys
import
os
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'..'
)))
from
core
import
StudyLanguage
logger
=
logging
.
getLogger
(
__name__
)
class
ChatDatabaseService
:
"""Service for managing chat history using a shared, robust connection pool"""
...
...
@@ -27,7 +31,7 @@ class ChatDatabaseService:
return
result
[
"nationality"
]
if
result
else
None
def
get_student_info
(
self
,
student_id
:
str
)
->
Optional
[
Dict
]:
"""Get complete student information
from database
"""
"""Get complete student information
with explicit language awareness
"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
cur
.
execute
(
...
...
@@ -40,17 +44,20 @@ class ChatDatabaseService:
)
result
=
cur
.
fetchone
()
if
result
:
# Convert boolean to explicit language enum
study_language
=
StudyLanguage
.
ARABIC
if
result
[
'language'
]
else
StudyLanguage
.
ENGLISH
return
{
'student_id'
:
result
[
'student_id'
],
'student_name'
:
result
[
'student_name'
],
'grade'
:
result
[
'grade'
],
'is_arabic'
:
result
[
'language'
],
'study_language'
:
study_language
,
# Explicit language enum
'is_arabic'
:
result
[
'language'
],
# Keep for backward compatibility
'nationality'
:
result
[
'nationality'
]
}
return
None
def
get_student_grade_and_language
(
self
,
student_id
:
str
)
->
Optional
[
Tuple
[
int
,
bool
]]:
"""Get student grade
and language preferenc
e"""
def
get_student_grade_and_language
(
self
,
student_id
:
str
)
->
Optional
[
Tuple
[
int
,
bool
,
StudyLanguage
]]:
"""Get student grade
, language preference, and explicit study languag
e"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
cur
.
execute
(
...
...
@@ -59,7 +66,8 @@ class ChatDatabaseService:
)
result
=
cur
.
fetchone
()
if
result
:
return
(
result
[
"grade"
],
result
[
"language"
])
study_language
=
StudyLanguage
.
ARABIC
if
result
[
'language'
]
else
StudyLanguage
.
ENGLISH
return
(
result
[
"grade"
],
result
[
"language"
],
study_language
)
return
None
def
get_chat_history
(
self
,
student_id
:
str
,
limit
:
int
=
20
)
->
List
[
Dict
[
str
,
str
]]:
...
...
@@ -154,6 +162,11 @@ class ChatDatabaseService:
)
conn
.
commit
()
# Log the language update explicitly
if
language
is
not
None
:
study_language
=
StudyLanguage
.
ARABIC
if
language
else
StudyLanguage
.
ENGLISH
logger
.
info
(
f
"Updated student {student_id} language to {study_language.value}"
)
def
create_student
(
self
,
student_id
:
str
,
student_name
:
str
,
grade
:
int
,
language
:
bool
,
nationality
:
str
=
'EGYPTIAN'
):
"""Create a new student record"""
...
...
@@ -168,3 +181,52 @@ class ChatDatabaseService:
(
student_id
,
student_name
,
grade
,
language
,
nationality
)
)
conn
.
commit
()
# Log the explicit language information
study_language
=
StudyLanguage
.
ARABIC
if
language
else
StudyLanguage
.
ENGLISH
logger
.
info
(
f
"Created student {student_id} ({student_name}) - Grade: {grade}, Language: {study_language.value}, Nationality: {nationality}"
)
def
get_student_language_summary
(
self
,
student_id
:
str
)
->
Optional
[
Dict
[
str
,
str
]]:
"""Get a human-readable summary of student's language settings"""
student_info
=
self
.
get_student_info
(
student_id
)
if
not
student_info
:
return
None
nationality_desc
=
"مصري"
if
student_info
[
'nationality'
]
.
lower
()
==
"egyptian"
else
"سعودي"
language_desc
=
"بالعربي"
if
student_info
[
'study_language'
]
==
StudyLanguage
.
ARABIC
else
"بالإنجليزي"
return
{
"student_id"
:
student_id
,
"student_name"
:
student_info
[
'student_name'
],
"study_language"
:
student_info
[
'study_language'
]
.
value
,
"nationality"
:
student_info
[
'nationality'
],
"grade"
:
str
(
student_info
[
'grade'
]),
"description"
:
f
"طالب {nationality_desc} في الصف {student_info['grade']} يدرس {language_desc}"
}
def
get_students_by_language
(
self
,
study_language
:
StudyLanguage
)
->
List
[
Dict
]:
"""Get all students who study in a specific language"""
language_bool
=
True
if
study_language
==
StudyLanguage
.
ARABIC
else
False
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
cur
.
execute
(
"""
SELECT student_id, student_name, grade, nationality
FROM students
WHERE language =
%
s
ORDER BY grade, student_name
"""
,
(
language_bool
,)
)
results
=
cur
.
fetchall
()
return
[
{
'student_id'
:
row
[
'student_id'
],
'student_name'
:
row
[
'student_name'
],
'grade'
:
row
[
'grade'
],
'nationality'
:
row
[
'nationality'
],
'study_language'
:
study_language
.
value
}
for
row
in
results
]
\ No newline at end of file
self_hosted_env/voice_agent/services/pgvector_service.py
View file @
cea5383f
import
os
import
psycopg2
from
psycopg2.extras
import
RealDictCursor
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
,
Dict
,
Any
import
logging
import
json
from
pgvector.psycopg2
import
register_vector
from
services.connection_pool
import
ConnectionPool
...
...
@@ -10,7 +11,7 @@ logger = logging.getLogger(__name__)
class
PGVectorService
:
"""
Service for managing embeddings with PostgreSQL pgvector using a shared, robust connection pool
"""
"""
Enhanced service for managing embeddings with PostgreSQL pgvector and curriculum structure awareness
"""
def
__init__
(
self
,
pool_handler
:
'ConnectionPool'
):
self
.
pool_handler
=
pool_handler
...
...
@@ -73,6 +74,44 @@ class PGVectorService:
)
return
cur
.
fetchall
()
def
search_with_curriculum_context
(
self
,
query_embedding
:
list
,
grade
:
int
,
subject
:
str
,
is_arabic
:
bool
,
limit
:
int
=
3
):
"""Enhanced search that includes curriculum position context"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
# Get curriculum structure first
curriculum
=
self
.
get_curriculum_structure
(
grade
,
is_arabic
,
subject
)
# Perform regular search
cur
.
execute
(
"""
SELECT id, grade, subject, unit, concept, lesson, chunk_text,
is_arabic, embedding <->
%
s::vector AS distance
FROM educational_chunks
WHERE grade =
%
s
AND subject ILIKE
%
s
AND is_arabic =
%
s
ORDER BY embedding <->
%
s::vector
LIMIT
%
s;
"""
,
(
query_embedding
,
grade
,
f
"
%
{subject}
%
"
,
is_arabic
,
query_embedding
,
limit
),
)
results
=
cur
.
fetchall
()
# Enhance results with curriculum context
for
result
in
results
:
result
[
'curriculum_context'
]
=
self
.
_build_curriculum_context
(
result
,
curriculum
,
grade
,
is_arabic
)
return
results
def
search_flexible_filtered_nearest
(
self
,
query_embedding
:
list
,
...
...
@@ -120,10 +159,73 @@ class PGVectorService:
)
return
cur
.
fetchall
()
def
get_curriculum_structure
(
self
,
grade
:
int
,
is_arabic
:
bool
,
subject
:
str
=
"Science"
)
->
Optional
[
Dict
]:
"""Get complete curriculum structure for grade/language from JSON-based data"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
cur
.
execute
(
"""
SELECT curriculum_data, created_at
FROM curriculum_structure
WHERE grade =
%
s AND is_arabic =
%
s AND subject =
%
s;
"""
,
(
grade
,
is_arabic
,
subject
)
)
result
=
cur
.
fetchone
()
if
result
:
return
result
[
'curriculum_data'
]
return
None
def
get_units_for_grade
(
self
,
grade
:
int
,
is_arabic
:
bool
,
subject
:
str
=
"Science"
)
->
List
[
str
]:
"""Get list of units for specific grade/language based on JSON structure"""
curriculum
=
self
.
get_curriculum_structure
(
grade
,
is_arabic
,
subject
)
if
not
curriculum
:
return
[]
units
=
[]
if
'units'
in
curriculum
:
for
unit
in
curriculum
[
'units'
]:
unit_name
=
unit
.
get
(
'name'
,
''
)
if
unit_name
:
units
.
append
(
unit_name
)
return
units
def
get_concepts_for_unit
(
self
,
grade
:
int
,
unit_name
:
str
,
is_arabic
:
bool
,
subject
:
str
=
"Science"
)
->
List
[
str
]:
"""Get concepts within a specific unit based on JSON structure"""
curriculum
=
self
.
get_curriculum_structure
(
grade
,
is_arabic
,
subject
)
if
not
curriculum
:
return
[]
concepts
=
[]
if
'units'
in
curriculum
:
for
unit
in
curriculum
[
'units'
]:
if
unit
.
get
(
'name'
)
==
unit_name
and
'concepts'
in
unit
:
for
concept
in
unit
[
'concepts'
]:
concept_name
=
concept
.
get
(
'name'
,
''
)
if
concept_name
:
concepts
.
append
(
concept_name
)
return
concepts
def
get_subjects_by_grade_and_language
(
self
,
grade
:
int
,
is_arabic
:
bool
)
->
List
[
str
]:
"""Get available subjects for a specific grade and language"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
# First check curriculum_structure table
cur
.
execute
(
"""
SELECT DISTINCT subject
FROM curriculum_structure
WHERE grade =
%
s AND is_arabic =
%
s
ORDER BY subject;
"""
,
(
grade
,
is_arabic
)
)
curriculum_subjects
=
[
row
[
'subject'
]
for
row
in
cur
.
fetchall
()]
if
curriculum_subjects
:
return
curriculum_subjects
# Fallback to educational_chunks table
cur
.
execute
(
"""
SELECT DISTINCT subject
...
...
@@ -134,3 +236,229 @@ class PGVectorService:
(
grade
,
is_arabic
)
)
return
[
row
[
'subject'
]
for
row
in
cur
.
fetchall
()]
def
insert_curriculum_structure
(
self
,
grade
:
int
,
is_arabic
:
bool
,
subject
:
str
,
curriculum_data
:
Dict
):
"""Insert or update curriculum structure"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
()
as
cur
:
cur
.
execute
(
"""
INSERT INTO curriculum_structure (grade, is_arabic, subject, curriculum_data)
VALUES (
%
s,
%
s,
%
s,
%
s)
ON CONFLICT (grade, is_arabic, subject)
DO UPDATE SET curriculum_data = EXCLUDED.curriculum_data, created_at = CURRENT_TIMESTAMP;
"""
,
(
grade
,
is_arabic
,
subject
,
json
.
dumps
(
curriculum_data
))
)
conn
.
commit
()
def
_build_curriculum_context
(
self
,
chunk_result
:
Dict
,
curriculum
:
Optional
[
Dict
],
grade
:
int
,
is_arabic
:
bool
)
->
Dict
:
"""Build curriculum context for a chunk result based on JSON structure"""
if
not
curriculum
:
return
{
"position"
:
"Unknown"
,
"related_concepts"
:
[],
"unit_overview"
:
""
,
"navigation_hint"
:
""
}
chunk_unit
=
chunk_result
.
get
(
'unit'
,
''
)
chunk_concept
=
chunk_result
.
get
(
'concept'
,
''
)
# Find the unit and concept in curriculum structure
current_unit
=
None
current_concept
=
None
related_concepts
=
[]
if
'units'
in
curriculum
:
for
unit
in
curriculum
[
'units'
]:
if
unit
.
get
(
'name'
)
==
chunk_unit
:
current_unit
=
unit
if
'concepts'
in
unit
:
for
concept
in
unit
[
'concepts'
]:
concept_name
=
concept
.
get
(
'name'
,
''
)
if
concept_name
==
chunk_concept
:
current_concept
=
concept
elif
concept_name
:
related_concepts
.
append
(
concept_name
)
break
# Build navigation hint
navigation_hint
=
""
if
current_unit
:
unit_name
=
current_unit
.
get
(
'name'
,
''
)
if
is_arabic
:
navigation_hint
=
f
"هذا جزء من {unit_name}"
if
related_concepts
:
navigation_hint
+=
f
"، والذي يتضمن أيضاً: {', '.join(related_concepts[:3])}"
else
:
navigation_hint
=
f
"This is part of {unit_name}"
if
related_concepts
:
navigation_hint
+=
f
", which also covers: {', '.join(related_concepts[:3])}"
return
{
"position"
:
f
"Grade {grade} → {chunk_unit} → {chunk_concept}"
,
"related_concepts"
:
related_concepts
,
"unit_overview"
:
current_unit
.
get
(
'description'
,
''
)
if
current_unit
else
""
,
"navigation_hint"
:
navigation_hint
}
def
classify_query_type
(
self
,
query
:
str
,
grade
:
int
,
is_arabic
:
bool
)
->
str
:
"""Classify query type for appropriate retrieval strategy"""
query_lower
=
query
.
lower
()
# Arabic overview patterns
arabic_overview_patterns
=
[
"ماذا ندرس"
,
"أظهر المنهج"
,
"ما هي المواضيع"
,
"ما المنهج"
,
"ما نتعلم"
,
"المحتويات"
,
"الوحدات"
,
"الفصول"
]
# English overview patterns
english_overview_patterns
=
[
"what do we study"
,
"show curriculum"
,
"what topics"
,
"what subjects"
,
"curriculum overview"
,
"table of contents"
,
"units"
,
"chapters"
]
# Arabic unit/concept navigation patterns
arabic_navigation_patterns
=
[
"ما في الوحدة"
,
"أخبرني عن الوحدة"
,
"محتوى الوحدة"
,
"مفاهيم الوحدة"
]
# English unit/concept navigation patterns
english_navigation_patterns
=
[
"what's in unit"
,
"tell me about unit"
,
"unit content"
,
"concepts in unit"
]
if
is_arabic
:
if
any
(
pattern
in
query
for
pattern
in
arabic_overview_patterns
):
return
"overview"
elif
any
(
pattern
in
query
for
pattern
in
arabic_navigation_patterns
):
return
"navigation"
else
:
if
any
(
pattern
in
query_lower
for
pattern
in
english_overview_patterns
):
return
"overview"
elif
any
(
pattern
in
query_lower
for
pattern
in
english_navigation_patterns
):
return
"navigation"
return
"specific_content"
def
get_overview_response
(
self
,
grade
:
int
,
is_arabic
:
bool
,
subject
:
str
=
"Science"
)
->
str
:
"""Generate curriculum overview response based on JSON structure"""
curriculum
=
self
.
get_curriculum_structure
(
grade
,
is_arabic
,
subject
)
if
not
curriculum
:
if
is_arabic
:
return
f
"عذراً، لا يوجد منهج متاح للصف {grade}"
else
:
return
f
"Sorry, no curriculum available for Grade {grade}"
if
is_arabic
:
response
=
f
"📚 منهج العلوم للصف {grade}
\n\n
"
if
'units'
in
curriculum
:
for
i
,
unit
in
enumerate
(
curriculum
[
'units'
],
1
):
unit_name
=
unit
.
get
(
'name'
,
f
'الوحدة {i}'
)
response
+=
f
"**الوحدة {i}: {unit_name}**
\n
"
if
'concepts'
in
unit
:
for
concept
in
unit
[
'concepts'
]:
concept_name
=
concept
.
get
(
'name'
,
''
)
concept_number
=
concept
.
get
(
'number'
,
''
)
if
concept_name
:
response
+=
f
"├── المفهوم {concept_number}: {concept_name}
\n
"
response
+=
"
\n
"
else
:
response
=
f
"📚 Science Curriculum for Grade {grade}
\n\n
"
if
'units'
in
curriculum
:
for
i
,
unit
in
enumerate
(
curriculum
[
'units'
],
1
):
unit_name
=
unit
.
get
(
'name'
,
f
'Unit {i}'
)
response
+=
f
"**Unit {i}: {unit_name}**
\n
"
if
'concepts'
in
unit
:
for
concept
in
unit
[
'concepts'
]:
concept_name
=
concept
.
get
(
'name'
,
''
)
concept_number
=
concept
.
get
(
'number'
,
''
)
if
concept_name
:
response
+=
f
"├── Concept {concept_number}: {concept_name}
\n
"
response
+=
"
\n
"
return
response
def
get_unit_navigation_response
(
self
,
query
:
str
,
grade
:
int
,
is_arabic
:
bool
,
subject
:
str
=
"Science"
)
->
str
:
"""Generate unit navigation response based on JSON structure"""
curriculum
=
self
.
get_curriculum_structure
(
grade
,
is_arabic
,
subject
)
if
not
curriculum
:
return
self
.
get_overview_response
(
grade
,
is_arabic
,
subject
)
# Extract unit number or name from query
import
re
numbers
=
re
.
findall
(
r'\d+'
,
query
)
response
=
""
if
is_arabic
:
response
=
f
"📖 من منهج العلوم للصف {grade}:
\n\n
"
else
:
response
=
f
"📖 From Grade {grade} Science Curriculum:
\n\n
"
# If unit number is specified
if
numbers
and
'units'
in
curriculum
:
unit_num
=
int
(
numbers
[
0
])
-
1
# Convert to 0-based index
if
0
<=
unit_num
<
len
(
curriculum
[
'units'
]):
unit
=
curriculum
[
'units'
][
unit_num
]
unit_name
=
unit
.
get
(
'name'
,
f
'Unit {unit_num + 1}'
)
if
is_arabic
:
response
+=
f
"**الوحدة {unit_num + 1}: {unit_name}**
\n\n
"
if
'concepts'
in
unit
:
response
+=
"المفاهيم:
\n
"
for
concept
in
unit
[
'concepts'
]:
concept_name
=
concept
.
get
(
'name'
,
''
)
concept_number
=
concept
.
get
(
'number'
,
''
)
if
concept_name
:
response
+=
f
"{concept_number}: {concept_name}
\n
"
else
:
response
+=
f
"**Unit {unit_num + 1}: {unit_name}**
\n\n
"
if
'concepts'
in
unit
:
response
+=
"Concepts:
\n
"
for
concept
in
unit
[
'concepts'
]:
concept_name
=
concept
.
get
(
'name'
,
''
)
concept_number
=
concept
.
get
(
'number'
,
''
)
if
concept_name
:
response
+=
f
"{concept_number}: {concept_name}
\n
"
return
response
# Fallback to overview
return
self
.
get_overview_response
(
grade
,
is_arabic
,
subject
)
def
setup_curriculum_table
(
self
):
"""Create curriculum_structure table if it doesn't exist"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
()
as
cur
:
cur
.
execute
(
"""
CREATE TABLE IF NOT EXISTS curriculum_structure (
id SERIAL PRIMARY KEY,
grade INTEGER NOT NULL,
is_arabic BOOLEAN NOT NULL,
subject VARCHAR(100) NOT NULL DEFAULT 'Science',
curriculum_data JSONB NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(grade, is_arabic, subject)
);
"""
)
# Create indexes for better performance
cur
.
execute
(
"CREATE INDEX IF NOT EXISTS idx_curriculum_grade_lang ON curriculum_structure(grade, is_arabic);"
)
cur
.
execute
(
"CREATE INDEX IF NOT EXISTS idx_curriculum_subject ON curriculum_structure(subject);"
)
conn
.
commit
()
logger
.
info
(
"Curriculum structure table setup complete"
)
def
get_all_available_curricula
(
self
)
->
List
[
Dict
]:
"""Get all available curricula from the database"""
with
self
.
pool_handler
.
get_connection
()
as
conn
:
with
conn
.
cursor
(
cursor_factory
=
RealDictCursor
)
as
cur
:
cur
.
execute
(
"""
SELECT grade, is_arabic, subject,
curriculum_data->>'title' as title,
created_at
FROM curriculum_structure
ORDER BY grade, is_arabic, subject;
"""
)
return
cur
.
fetchall
()
\ No newline at end of file
self_hosted_env/voice_agent/start.sh
View file @
cea5383f
...
...
@@ -8,6 +8,8 @@ echo "Setting up schema and inserting data..."
python apply_test_schema.py
python insert_csv_embeddings.py
echo
"Database setup complete."
python curriculum_structure.py
echo
"Curriculum structure setup complete."
sleep
5
# Start the web server and keep it as the main process
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment