handle vector database

parent 90af444c
...@@ -8,7 +8,7 @@ RUN pip install --no-cache-dir -r requirements.txt ...@@ -8,7 +8,7 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY . . COPY . .
#just keep the container running without doing anything #just keep the container running without doing anything
#CMD ["sh", "-c", "while :; do sleep 10; done"] CMD ["sh", "-c", "while :; do sleep 10; done"]
#run the app automatically when the container starts #run the app automatically when the container starts
CMD ["python", "main.py"] #CMD ["python", "main.py"]
...@@ -6,7 +6,9 @@ schema_sql = """ ...@@ -6,7 +6,9 @@ schema_sql = """
CREATE TABLE IF NOT EXISTS students ( CREATE TABLE IF NOT EXISTS students (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
student_id VARCHAR(50) UNIQUE NOT NULL, student_id VARCHAR(50) UNIQUE NOT NULL,
grade INTEGER, student_name VARCHAR(100),
grade VARCHAR(20),
language BOOLEAN,
nationality VARCHAR(20) NOT NULL DEFAULT 'EGYPTIAN' nationality VARCHAR(20) NOT NULL DEFAULT 'EGYPTIAN'
); );
...@@ -26,11 +28,11 @@ CREATE INDEX IF NOT EXISTS idx_chat_history_created_at ON chat_history(created_a ...@@ -26,11 +28,11 @@ CREATE INDEX IF NOT EXISTS idx_chat_history_created_at ON chat_history(created_a
CREATE INDEX IF NOT EXISTS idx_students_nationality ON students(nationality); CREATE INDEX IF NOT EXISTS idx_students_nationality ON students(nationality);
-- Insert dummy data for testing -- Insert dummy data for testing
INSERT INTO students (student_id, grade, nationality) VALUES INSERT INTO students (student_id, student_name, grade, language, nationality) VALUES
('student_001', 3, 'EGYPTIAN'), ('student_001', 'Ahmed Ali', 'prime4', TRUE, 'EGYPTIAN'),
('student_002', 4, 'SAUDI'), ('student_002', 'Sara Hassan', 'prime6', FALSE, 'SAUDI'),
('student_003', 2, 'EGYPTIAN'), ('student_003', 'Mona Adel', 'prime5', TRUE, 'EGYPTIAN'),
('student_004', 5, 'SAUDI') ('student_004', 'Omar Youssef', 'prime6', FALSE, 'SAUDI')
ON CONFLICT (student_id) DO NOTHING; ON CONFLICT (student_id) DO NOTHING;
""" """
...@@ -57,7 +59,7 @@ conn.autocommit = True ...@@ -57,7 +59,7 @@ conn.autocommit = True
with conn.cursor() as cur: with conn.cursor() as cur:
# Drop all existing tables (uncomment if needed) # Drop all existing tables (uncomment if needed)
#cur.execute(drop_all_tables_sql) cur.execute(drop_all_tables_sql)
cur.execute(schema_sql) cur.execute(schema_sql)
# Verifications: Select from students and chat_history tables # Verifications: Select from students and chat_history tables
......
import os import os
import psycopg2 import psycopg2
from psycopg2.extras import RealDictCursor from psycopg2.extras import RealDictCursor
from typing import List, Dict, Optional from typing import List, Dict, Optional, Tuple
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -28,6 +28,39 @@ class ChatDatabaseService: ...@@ -28,6 +28,39 @@ class ChatDatabaseService:
result = cur.fetchone() result = cur.fetchone()
return result["nationality"] if result else None return result["nationality"] if result else None
def get_student_info(self, student_id: str) -> Optional[Dict]:
"""Get complete student information from database"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"""
SELECT student_id, grade, language, nationality
FROM students
WHERE student_id = %s
""",
(student_id,)
)
result = cur.fetchone()
if result:
return {
'student_id': result['student_id'],
'grade': result['grade'],
'is_arabic': result['language'], # Convert language boolean to is_arabic
'nationality': result['nationality']
}
return None
def get_student_grade_and_language(self, student_id: str) -> Optional[Tuple[int, bool]]:
"""Get student grade and language preference"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"SELECT grade, language FROM students WHERE student_id = %s",
(student_id,)
)
result = cur.fetchone()
if result:
return (result["grade"], result["language"])
return None
def get_chat_history(self, student_id: str, limit: int = 20) -> List[Dict[str, str]]: def get_chat_history(self, student_id: str, limit: int = 20) -> List[Dict[str, str]]:
"""Get chat history for a student, returns in chronological order""" """Get chat history for a student, returns in chronological order"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur: with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
...@@ -85,6 +118,51 @@ class ChatDatabaseService: ...@@ -85,6 +118,51 @@ class ChatDatabaseService:
) )
self.conn.commit() self.conn.commit()
def update_student_info(self, student_id: str, grade: Optional[int] = None,
language: Optional[bool] = None, nationality: Optional[str] = None):
"""Update student information"""
updates = []
params = []
if grade is not None:
updates.append("grade = %s")
params.append(grade)
if language is not None:
updates.append("language = %s")
params.append(language)
if nationality is not None:
updates.append("nationality = %s")
params.append(nationality)
if updates:
params.append(student_id)
with self.conn.cursor() as cur:
cur.execute(
f"""
UPDATE students
SET {', '.join(updates)}
WHERE student_id = %s
""",
params
)
self.conn.commit()
def create_student(self, student_id: str, student_name: str, grade: str,
language: bool, nationality: str = 'EGYPTIAN'):
"""Create a new student record"""
with self.conn.cursor() as cur:
cur.execute(
"""
INSERT INTO students (student_id, student_name, grade, language, nationality)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (student_id) DO NOTHING;
""",
(student_id, student_name, grade, language, nationality)
)
self.conn.commit()
def close(self): def close(self):
if self.conn: if self.conn:
self.conn.close() self.conn.close()
\ No newline at end of file
import os import os
import psycopg2 import psycopg2
from psycopg2.extras import RealDictCursor from psycopg2.extras import RealDictCursor
from typing import List, Optional
class PGVectorService: class PGVectorService:
...@@ -41,6 +42,106 @@ class PGVectorService: ...@@ -41,6 +42,106 @@ class PGVectorService:
) )
return cur.fetchall() return cur.fetchall()
def search_filtered_nearest(
self,
query_embedding: list,
grade: int,
subject: str,
is_arabic: bool,
limit: int = 3
):
"""Search nearest embeddings with filtering by grade, subject, and language"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"""
SELECT id, grade, subject, unit, concept, lesson, chunk_text,
is_arabic, embedding <-> %s AS distance
FROM educational_chunks
WHERE grade = %s
AND subject ILIKE %s
AND is_arabic = %s
ORDER BY embedding <-> %s
LIMIT %s;
""",
(query_embedding, grade, f"%{subject}%", is_arabic, query_embedding, limit),
)
return cur.fetchall()
def search_flexible_filtered_nearest(
self,
query_embedding: list,
grade: Optional[int] = None,
subject: Optional[str] = None,
is_arabic: Optional[bool] = None,
limit: int = 3
):
"""Search nearest embeddings with flexible filtering"""
conditions = []
params = [query_embedding]
if grade is not None:
conditions.append("grade = %s")
params.append(grade)
if subject is not None:
conditions.append("subject ILIKE %s")
params.append(f"%{subject}%")
if is_arabic is not None:
conditions.append("is_arabic = %s")
params.append(is_arabic)
where_clause = ""
if conditions:
where_clause = "WHERE " + " AND ".join(conditions)
# Add query_embedding again for ORDER BY
params.append(query_embedding)
params.append(limit)
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
f"""
SELECT id, grade, subject, unit, concept, lesson, chunk_text,
is_arabic, embedding <-> %s AS distance
FROM educational_chunks
{where_clause}
ORDER BY embedding <-> %s
LIMIT %s;
""",
params
)
return cur.fetchall()
def get_subjects_by_grade_and_language(self, grade: str, is_arabic: bool) -> List[str]:
"""Get available subjects for a specific grade and language"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
# Extract numeric part from grade string
grade_number = ''.join(filter(str.isdigit, grade)) if grade else None
if grade_number:
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE grade = %s AND is_arabic = %s
ORDER BY subject;
""",
(int(grade_number), is_arabic)
)
else:
# Fallback if grade parsing fails
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE is_arabic = %s
ORDER BY subject;
""",
(is_arabic,)
)
return [row['subject'] for row in cur.fetchall()]
def close(self): def close(self):
if self.conn: if self.conn:
self.conn.close() self.conn.close()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment