handle vector database

parent 90af444c
......@@ -8,7 +8,7 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY . .
#just keep the container running without doing anything
#CMD ["sh", "-c", "while :; do sleep 10; done"]
CMD ["sh", "-c", "while :; do sleep 10; done"]
#run the app automatically when the container starts
CMD ["python", "main.py"]
#CMD ["python", "main.py"]
......@@ -6,7 +6,9 @@ schema_sql = """
CREATE TABLE IF NOT EXISTS students (
id SERIAL PRIMARY KEY,
student_id VARCHAR(50) UNIQUE NOT NULL,
grade INTEGER,
student_name VARCHAR(100),
grade VARCHAR(20),
language BOOLEAN,
nationality VARCHAR(20) NOT NULL DEFAULT 'EGYPTIAN'
);
......@@ -26,11 +28,11 @@ CREATE INDEX IF NOT EXISTS idx_chat_history_created_at ON chat_history(created_a
CREATE INDEX IF NOT EXISTS idx_students_nationality ON students(nationality);
-- Insert dummy data for testing
INSERT INTO students (student_id, grade, nationality) VALUES
('student_001', 3, 'EGYPTIAN'),
('student_002', 4, 'SAUDI'),
('student_003', 2, 'EGYPTIAN'),
('student_004', 5, 'SAUDI')
INSERT INTO students (student_id, student_name, grade, language, nationality) VALUES
('student_001', 'Ahmed Ali', 'prime4', TRUE, 'EGYPTIAN'),
('student_002', 'Sara Hassan', 'prime6', FALSE, 'SAUDI'),
('student_003', 'Mona Adel', 'prime5', TRUE, 'EGYPTIAN'),
('student_004', 'Omar Youssef', 'prime6', FALSE, 'SAUDI')
ON CONFLICT (student_id) DO NOTHING;
"""
......@@ -57,7 +59,7 @@ conn.autocommit = True
with conn.cursor() as cur:
# Drop all existing tables (uncomment if needed)
#cur.execute(drop_all_tables_sql)
cur.execute(drop_all_tables_sql)
cur.execute(schema_sql)
# Verifications: Select from students and chat_history tables
......
import os
import psycopg2
from psycopg2.extras import RealDictCursor
from typing import List, Dict, Optional
from typing import List, Dict, Optional, Tuple
import logging
logger = logging.getLogger(__name__)
......@@ -28,6 +28,39 @@ class ChatDatabaseService:
result = cur.fetchone()
return result["nationality"] if result else None
def get_student_info(self, student_id: str) -> Optional[Dict]:
"""Get complete student information from database"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"""
SELECT student_id, grade, language, nationality
FROM students
WHERE student_id = %s
""",
(student_id,)
)
result = cur.fetchone()
if result:
return {
'student_id': result['student_id'],
'grade': result['grade'],
'is_arabic': result['language'], # Convert language boolean to is_arabic
'nationality': result['nationality']
}
return None
def get_student_grade_and_language(self, student_id: str) -> Optional[Tuple[int, bool]]:
"""Get student grade and language preference"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"SELECT grade, language FROM students WHERE student_id = %s",
(student_id,)
)
result = cur.fetchone()
if result:
return (result["grade"], result["language"])
return None
def get_chat_history(self, student_id: str, limit: int = 20) -> List[Dict[str, str]]:
"""Get chat history for a student, returns in chronological order"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
......@@ -85,6 +118,51 @@ class ChatDatabaseService:
)
self.conn.commit()
def update_student_info(self, student_id: str, grade: Optional[int] = None,
language: Optional[bool] = None, nationality: Optional[str] = None):
"""Update student information"""
updates = []
params = []
if grade is not None:
updates.append("grade = %s")
params.append(grade)
if language is not None:
updates.append("language = %s")
params.append(language)
if nationality is not None:
updates.append("nationality = %s")
params.append(nationality)
if updates:
params.append(student_id)
with self.conn.cursor() as cur:
cur.execute(
f"""
UPDATE students
SET {', '.join(updates)}
WHERE student_id = %s
""",
params
)
self.conn.commit()
def create_student(self, student_id: str, student_name: str, grade: str,
language: bool, nationality: str = 'EGYPTIAN'):
"""Create a new student record"""
with self.conn.cursor() as cur:
cur.execute(
"""
INSERT INTO students (student_id, student_name, grade, language, nationality)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (student_id) DO NOTHING;
""",
(student_id, student_name, grade, language, nationality)
)
self.conn.commit()
def close(self):
if self.conn:
self.conn.close()
\ No newline at end of file
import os
import psycopg2
from psycopg2.extras import RealDictCursor
from typing import List, Optional
class PGVectorService:
......@@ -41,6 +42,106 @@ class PGVectorService:
)
return cur.fetchall()
def search_filtered_nearest(
self,
query_embedding: list,
grade: int,
subject: str,
is_arabic: bool,
limit: int = 3
):
"""Search nearest embeddings with filtering by grade, subject, and language"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"""
SELECT id, grade, subject, unit, concept, lesson, chunk_text,
is_arabic, embedding <-> %s AS distance
FROM educational_chunks
WHERE grade = %s
AND subject ILIKE %s
AND is_arabic = %s
ORDER BY embedding <-> %s
LIMIT %s;
""",
(query_embedding, grade, f"%{subject}%", is_arabic, query_embedding, limit),
)
return cur.fetchall()
def search_flexible_filtered_nearest(
self,
query_embedding: list,
grade: Optional[int] = None,
subject: Optional[str] = None,
is_arabic: Optional[bool] = None,
limit: int = 3
):
"""Search nearest embeddings with flexible filtering"""
conditions = []
params = [query_embedding]
if grade is not None:
conditions.append("grade = %s")
params.append(grade)
if subject is not None:
conditions.append("subject ILIKE %s")
params.append(f"%{subject}%")
if is_arabic is not None:
conditions.append("is_arabic = %s")
params.append(is_arabic)
where_clause = ""
if conditions:
where_clause = "WHERE " + " AND ".join(conditions)
# Add query_embedding again for ORDER BY
params.append(query_embedding)
params.append(limit)
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
f"""
SELECT id, grade, subject, unit, concept, lesson, chunk_text,
is_arabic, embedding <-> %s AS distance
FROM educational_chunks
{where_clause}
ORDER BY embedding <-> %s
LIMIT %s;
""",
params
)
return cur.fetchall()
def get_subjects_by_grade_and_language(self, grade: str, is_arabic: bool) -> List[str]:
"""Get available subjects for a specific grade and language"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
# Extract numeric part from grade string
grade_number = ''.join(filter(str.isdigit, grade)) if grade else None
if grade_number:
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE grade = %s AND is_arabic = %s
ORDER BY subject;
""",
(int(grade_number), is_arabic)
)
else:
# Fallback if grade parsing fails
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE is_arabic = %s
ORDER BY subject;
""",
(is_arabic,)
)
return [row['subject'] for row in cur.fetchall()]
def close(self):
if self.conn:
self.conn.close()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment