Files
ai7-m3/scheduler_bots/verify_db.py
2026-02-05 10:15:09 +03:00

232 lines
7.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
"""
verify_db.py - Verification script for the school schedule database
Checks data quality in teachers, groups, and students tables
"""
import sqlite3
import re
def connect_db(db_name='school_schedule.db'):
"""Connect to the database"""
conn = sqlite3.connect(db_name)
cursor = conn.cursor()
return conn, cursor
def check_teachers_table(cursor):
"""Check the teachers table for data quality issues"""
print("Checking teachers table...")
cursor.execute("SELECT COUNT(*) FROM teachers")
total_count = cursor.fetchone()[0]
print(f"Total teachers: {total_count}")
# Find teachers with default names
cursor.execute("SELECT name FROM teachers WHERE name LIKE '%Default Teacher%' OR name LIKE '%Неизвестный%'")
default_teachers = cursor.fetchall()
print(f"Teachers with default names: {len(default_teachers)}")
for teacher in default_teachers:
print(f" - {teacher[0]}")
# Find potentially invalid teacher names
invalid_teachers = []
cursor.execute("SELECT name FROM teachers")
all_teachers = cursor.fetchall()
for (teacher_name,) in all_teachers:
if not is_valid_teacher_name(teacher_name):
invalid_teachers.append(teacher_name)
print(f"Potentially invalid teacher names: {len(invalid_teachers)}")
for teacher in invalid_teachers:
print(f" - {teacher}")
print()
def is_valid_teacher_name(name):
"""Check if a name looks like a valid teacher name"""
# Skip default names as they're intentionally different
if 'Default Teacher' in name or 'Неизвестный' in name:
return True # Considered valid as intentional placeholders
# Check for common invalid patterns
invalid_patterns = [
r'^\d+[А-ЯA-Z]$', # Class pattern like "8А", "11B"
r'^[А-ЯЁA-Z]\d+\s+[А-ЯЁA-Z]\d+$', # "E4 E5" pattern
r'.*[Gg]roup.*', # Group identifiers
r'.*[Tt]rack.*', # Track identifiers
r'^[А-ЯЁA-Z]\d+$', # Single group identifiers like "E4"
r'.*[Cc]lub.*', # Club identifiers
]
for pattern in invalid_patterns:
if re.match(pattern, name, re.IGNORECASE):
return False
# Valid teacher name patterns
valid_patterns = [
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+', # Russian names
r'^[A-Z][a-z]+\s+[A-Z][a-z]+', # English names
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.', # Name with initial
r'^[A-Z][a-z]+\s+[A-Z]\.', # Name with initial (English)
]
for pattern in valid_patterns:
if re.match(pattern, name):
return True
# If it's a reasonably long string with spaces and proper capitalization
parts = name.split()
if len(parts) >= 2 and len(name) >= 5:
# Check if parts start with capital letters
if all(len(part) > 0 and part[0].isupper() for part in parts):
return True
return False
def check_groups_table(cursor):
"""Check the groups table for data quality issues"""
print("Checking groups table...")
cursor.execute("SELECT COUNT(*) FROM groups")
total_count = cursor.fetchone()[0]
print(f"Total groups: {total_count}")
# Get all group names
cursor.execute("SELECT name FROM groups")
all_groups = cursor.fetchall()
# Check for potential student names in group names
potential_student_names = []
for (group_name,) in all_groups:
if looks_like_student_name(group_name):
potential_student_names.append(group_name)
print(f"Groups that look like student names: {len(potential_student_names)}")
for group in potential_student_names[:10]: # Show first 10
print(f" - {group}")
print()
def looks_like_student_name(name):
"""Check if a name looks like a student name instead of a group"""
# Class patterns like "8А", "11B" are OK as groups
class_pattern = r'^\d+[А-ЯA-Z]$'
if re.match(class_pattern, name):
return False
# Student names typically follow name patterns
name_pattern = r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+' # Russian name
if re.match(name_pattern, name):
return True
name_pattern = r'^[A-Z][a-z]+\s+[A-Z][a-z]+' # English name
if re.match(name_pattern, name):
return True
# If it contains common group identifiers, it's likely a valid group
group_indicators = ['club', 'track', 'group', 'module', '-', 'class']
if any(indicator in name.lower() for indicator in group_indicators):
return False
return False
def check_students_table(cursor):
"""Check the students table"""
print("Checking students table...")
cursor.execute("SELECT COUNT(*) FROM students")
total_count = cursor.fetchone()[0]
print(f"Total students: {total_count}")
# Get sample students
cursor.execute("SELECT full_name, class_name FROM students LIMIT 5")
samples = cursor.fetchall()
print("Sample students:")
for student in samples:
print(f" - {student[0]} (Class: {student[1]})")
print()
def check_schedule_integrity(cursor):
"""Check the schedule table for data consistency"""
print("Checking schedule table integrity...")
# Count total schedule entries
cursor.execute("SELECT COUNT(*) FROM schedule")
total_schedules = cursor.fetchone()[0]
print(f"Total schedule entries: {total_schedules}")
# Count entries with valid relationships
cursor.execute("""
SELECT COUNT(*)
FROM schedule s
JOIN students st ON s.student_id = st.student_id
JOIN subjects su ON s.subject_id = su.subject_id
JOIN teachers t ON s.teacher_id = t.teacher_id
JOIN groups g ON s.group_id = g.group_id
""")
valid_relationships = cursor.fetchone()[0]
print(f"Schedules with valid relationships: {valid_relationships}")
# Check for orphaned records
print("Checking for orphaned records...")
# Students in schedule but not in students table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN students st ON s.student_id = st.student_id
WHERE st.student_id IS NULL
""")
orphaned_students = cursor.fetchone()[0]
print(f"Orphaned student references: {orphaned_students}")
# Subjects in schedule but not in subjects table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN subjects su ON s.subject_id = su.subject_id
WHERE su.subject_id IS NULL
""")
orphaned_subjects = cursor.fetchone()[0]
print(f"Orphaned subject references: {orphaned_subjects}")
# Teachers in schedule but not in teachers table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN teachers t ON s.teacher_id = t.teacher_id
WHERE t.teacher_id IS NULL
""")
orphaned_teachers = cursor.fetchone()[0]
print(f"Orphaned teacher references: {orphaned_teachers}")
# Groups in schedule but not in groups table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN groups g ON s.group_id = g.group_id
WHERE g.group_id IS NULL
""")
orphaned_groups = cursor.fetchone()[0]
print(f"Orphaned group references: {orphaned_groups}")
print()
def main():
"""Main function to run all checks"""
print("School Schedule Database Verification")
print("="*40)
try:
conn, cursor = connect_db()
check_teachers_table(cursor)
check_groups_table(cursor)
check_students_table(cursor)
check_schedule_integrity(cursor)
conn.close()
print("Verification complete!")
except Exception as e:
print(f"Error during verification: {str(e)}")
if __name__ == "__main__":
main()