grade as an int

parent dcae36f9
......@@ -7,7 +7,7 @@ CREATE TABLE IF NOT EXISTS students (
id SERIAL PRIMARY KEY,
student_id VARCHAR(50) UNIQUE NOT NULL,
student_name VARCHAR(100),
grade VARCHAR(20),
grade INTEGER NOT NULL,
language BOOLEAN,
nationality VARCHAR(20) NOT NULL DEFAULT 'EGYPTIAN'
);
......@@ -26,13 +26,15 @@ CREATE TABLE IF NOT EXISTS chat_history (
CREATE INDEX IF NOT EXISTS idx_chat_history_student_id ON chat_history(student_id);
CREATE INDEX IF NOT EXISTS idx_chat_history_created_at ON chat_history(created_at);
CREATE INDEX IF NOT EXISTS idx_students_nationality ON students(nationality);
CREATE INDEX IF NOT EXISTS idx_students_grade ON students(grade);
CREATE INDEX IF NOT EXISTS idx_students_grade_language ON students(grade, language);
-- Insert dummy data for testing
INSERT INTO students (student_id, student_name, grade, language, nationality) VALUES
('student_001', 'Ahmed Ali', 'prime4', TRUE, 'EGYPTIAN'),
('student_002', 'Sara Hassan', 'prime6', FALSE, 'SAUDI'),
('student_003', 'Mona Adel', 'prime5', TRUE, 'EGYPTIAN'),
('student_004', 'Omar Youssef', 'prime6', FALSE, 'SAUDI')
('student_001', 'Ahmed Ali', 4, TRUE, 'EGYPTIAN'),
('student_002', 'Sara Hassan', 6, FALSE, 'SAUDI'),
('student_003', 'Mona Adel', 5, TRUE, 'EGYPTIAN'),
('student_004', 'Omar Youssef', 6, FALSE, 'SAUDI')
ON CONFLICT (student_id) DO NOTHING;
"""
......@@ -100,7 +102,7 @@ def setup_database(drop_existing_tables: bool = False):
if __name__ == "__main__":
# To run with a clean slate, pass True
# setup_database(drop_existing_tables=True)
setup_database(drop_existing_tables=True)
# To run without dropping tables (default)
setup_database()
\ No newline at end of file
#setup_database()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -26,7 +26,7 @@ def create_schema_and_table(conn, drop_existing_table: bool):
create_table = """
CREATE TABLE IF NOT EXISTS educational_chunks (
id SERIAL PRIMARY KEY,
grade TEXT NOT NULL,
grade INTEGER NOT NULL,
subject TEXT,
unit TEXT,
concept TEXT,
......@@ -64,6 +64,40 @@ def create_schema_and_table(conn, drop_existing_table: bool):
conn.commit()
cur.close()
def extract_grade_number(grade_value) -> int:
"""Extract numeric grade from various formats"""
if pd.isna(grade_value):
return 1 # default
grade_str = str(grade_value).lower().strip()
# Try to extract number directly
if grade_str.isdigit():
return int(grade_str)
# Handle common formats like "grade4", "prime4", "4th grade", etc.
import re
numbers = re.findall(r'\d+', grade_str)
if numbers:
return int(numbers[0])
# Fallback mapping for word-based grades
grade_mapping = {
'one': 1, 'first': 1,
'two': 2, 'second': 2,
'three': 3, 'third': 3,
'four': 4, 'fourth': 4,
'five': 5, 'fifth': 5,
'six': 6, 'sixth': 6,
}
for word, num in grade_mapping.items():
if word in grade_str:
return num
# Final fallback
print(f"Warning: Could not parse grade '{grade_value}', defaulting to 1")
return 1
def insert_chunks_from_csv(csv_file: str):
df = pd.read_csv(csv_file)
......@@ -95,8 +129,11 @@ def insert_chunks_from_csv(csv_file: str):
for idx, row in df.iterrows():
try:
embedding = json.loads(row["Embedding"])
# Convert grade to integer
grade_int = extract_grade_number(row["Grade"])
buffer.append((
str(row["Grade"]),
grade_int, # Now properly converted to integer
row["Subject"],
row.get("Unit"),
row.get("Concept"),
......@@ -161,7 +198,7 @@ def setup_embeddings_database(drop_existing_tables: bool = False):
if __name__ == "__main__":
# To run with a clean slate, pass True
# setup_embeddings_database(drop_existing_tables=True)
setup_embeddings_database(drop_existing_tables=True)
# To run without dropping the table (default)
setup_embeddings_database()
\ No newline at end of file
#setup_embeddings_database()
\ No newline at end of file
......@@ -43,8 +43,8 @@ class ChatDatabaseService:
if result:
return {
'student_id': result['student_id'],
'student_name': result['student_name'], # Added this line
'grade': result['grade'],
'student_name': result['student_name'],
'grade': result['grade'], # This is now an integer
'is_arabic': result['language'], # Convert language boolean to is_arabic
'nationality': result['nationality']
}
......@@ -150,7 +150,7 @@ class ChatDatabaseService:
)
self.conn.commit()
def create_student(self, student_id: str, student_name: str, grade: str,
def create_student(self, student_id: str, student_name: str, grade: int,
language: bool, nationality: str = 'EGYPTIAN'):
"""Create a new student record"""
with self.conn.cursor() as cur:
......
......@@ -117,33 +117,18 @@ class PGVectorService:
)
return cur.fetchall()
def get_subjects_by_grade_and_language(self, grade: str, is_arabic: bool) -> List[str]:
def get_subjects_by_grade_and_language(self, grade: int, is_arabic: bool) -> List[str]:
"""Get available subjects for a specific grade and language"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cur:
# Extract numeric part from grade string
grade_number = ''.join(filter(str.isdigit, grade)) if grade else None
if grade_number:
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE grade = %s AND is_arabic = %s
ORDER BY subject;
""",
(int(grade_number), is_arabic)
)
else:
# Fallback if grade parsing fails
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE is_arabic = %s
ORDER BY subject;
""",
(is_arabic,)
)
cur.execute(
"""
SELECT DISTINCT subject
FROM educational_chunks
WHERE grade = %s AND is_arabic = %s
ORDER BY subject;
""",
(grade, is_arabic)
)
return [row['subject'] for row in cur.fetchall()]
def close(self):
......
......@@ -187,8 +187,8 @@
<script>
// Configuration
const Config = {
BACKEND_URL: "http://teamtestingdocker.caprover.al-arcade.com:8000/chat",
AUDIO_RESPONSE_URL: "http://teamtestingdocker.caprover.al-arcade.com:8000/get-audio-response"
BACKEND_URL: "http://localhost:8000/chat",
AUDIO_RESPONSE_URL: "http://localhost:8000/get-audio-response"
};
// Enums
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment