#!/usr/bin/env python """ verify_db.py - Verification script for the school schedule database Checks data quality in teachers, groups, and students tables """ import sqlite3 import re def connect_db(db_name='school_schedule.db'): """Connect to the database""" conn = sqlite3.connect(db_name) cursor = conn.cursor() return conn, cursor def check_teachers_table(cursor): """Check the teachers table for data quality issues""" print("Checking teachers table...") cursor.execute("SELECT COUNT(*) FROM teachers") total_count = cursor.fetchone()[0] print(f"Total teachers: {total_count}") # Find teachers with default names cursor.execute("SELECT name FROM teachers WHERE name LIKE '%Default Teacher%' OR name LIKE '%Неизвестный%'") default_teachers = cursor.fetchall() print(f"Teachers with default names: {len(default_teachers)}") for teacher in default_teachers: print(f" - {teacher[0]}") # Find potentially invalid teacher names invalid_teachers = [] cursor.execute("SELECT name FROM teachers") all_teachers = cursor.fetchall() for (teacher_name,) in all_teachers: if not is_valid_teacher_name(teacher_name): invalid_teachers.append(teacher_name) print(f"Potentially invalid teacher names: {len(invalid_teachers)}") for teacher in invalid_teachers: print(f" - {teacher}") print() def is_valid_teacher_name(name): """Check if a name looks like a valid teacher name""" # Skip default names as they're intentionally different if 'Default Teacher' in name or 'Неизвестный' in name: return True # Considered valid as intentional placeholders # Check for common invalid patterns invalid_patterns = [ r'^\d+[А-ЯA-Z]$', # Class pattern like "8А", "11B" r'^[А-ЯЁA-Z]\d+\s+[А-ЯЁA-Z]\d+$', # "E4 E5" pattern r'.*[Gg]roup.*', # Group identifiers r'.*[Tt]rack.*', # Track identifiers r'^[А-ЯЁA-Z]\d+$', # Single group identifiers like "E4" r'.*[Cc]lub.*', # Club identifiers ] for pattern in invalid_patterns: if re.match(pattern, name, re.IGNORECASE): return False # Valid teacher name patterns valid_patterns = [ r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+', # Russian names r'^[A-Z][a-z]+\s+[A-Z][a-z]+', # English names r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.', # Name with initial r'^[A-Z][a-z]+\s+[A-Z]\.', # Name with initial (English) ] for pattern in valid_patterns: if re.match(pattern, name): return True # If it's a reasonably long string with spaces and proper capitalization parts = name.split() if len(parts) >= 2 and len(name) >= 5: # Check if parts start with capital letters if all(len(part) > 0 and part[0].isupper() for part in parts): return True return False def check_groups_table(cursor): """Check the groups table for data quality issues""" print("Checking groups table...") cursor.execute("SELECT COUNT(*) FROM groups") total_count = cursor.fetchone()[0] print(f"Total groups: {total_count}") # Get all group names cursor.execute("SELECT name FROM groups") all_groups = cursor.fetchall() # Check for potential student names in group names potential_student_names = [] for (group_name,) in all_groups: if looks_like_student_name(group_name): potential_student_names.append(group_name) print(f"Groups that look like student names: {len(potential_student_names)}") for group in potential_student_names[:10]: # Show first 10 print(f" - {group}") print() def looks_like_student_name(name): """Check if a name looks like a student name instead of a group""" # Class patterns like "8А", "11B" are OK as groups class_pattern = r'^\d+[А-ЯA-Z]$' if re.match(class_pattern, name): return False # Student names typically follow name patterns name_pattern = r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+' # Russian name if re.match(name_pattern, name): return True name_pattern = r'^[A-Z][a-z]+\s+[A-Z][a-z]+' # English name if re.match(name_pattern, name): return True # If it contains common group identifiers, it's likely a valid group group_indicators = ['club', 'track', 'group', 'module', '-', 'class'] if any(indicator in name.lower() for indicator in group_indicators): return False return False def check_students_table(cursor): """Check the students table""" print("Checking students table...") cursor.execute("SELECT COUNT(*) FROM students") total_count = cursor.fetchone()[0] print(f"Total students: {total_count}") # Get sample students cursor.execute("SELECT full_name, class_name FROM students LIMIT 5") samples = cursor.fetchall() print("Sample students:") for student in samples: print(f" - {student[0]} (Class: {student[1]})") print() def check_schedule_integrity(cursor): """Check the schedule table for data consistency""" print("Checking schedule table integrity...") # Count total schedule entries cursor.execute("SELECT COUNT(*) FROM schedule") total_schedules = cursor.fetchone()[0] print(f"Total schedule entries: {total_schedules}") # Count entries with valid relationships cursor.execute(""" SELECT COUNT(*) FROM schedule s JOIN students st ON s.student_id = st.student_id JOIN subjects su ON s.subject_id = su.subject_id JOIN teachers t ON s.teacher_id = t.teacher_id JOIN groups g ON s.group_id = g.group_id """) valid_relationships = cursor.fetchone()[0] print(f"Schedules with valid relationships: {valid_relationships}") # Check for orphaned records print("Checking for orphaned records...") # Students in schedule but not in students table cursor.execute(""" SELECT COUNT(*) FROM schedule s LEFT JOIN students st ON s.student_id = st.student_id WHERE st.student_id IS NULL """) orphaned_students = cursor.fetchone()[0] print(f"Orphaned student references: {orphaned_students}") # Subjects in schedule but not in subjects table cursor.execute(""" SELECT COUNT(*) FROM schedule s LEFT JOIN subjects su ON s.subject_id = su.subject_id WHERE su.subject_id IS NULL """) orphaned_subjects = cursor.fetchone()[0] print(f"Orphaned subject references: {orphaned_subjects}") # Teachers in schedule but not in teachers table cursor.execute(""" SELECT COUNT(*) FROM schedule s LEFT JOIN teachers t ON s.teacher_id = t.teacher_id WHERE t.teacher_id IS NULL """) orphaned_teachers = cursor.fetchone()[0] print(f"Orphaned teacher references: {orphaned_teachers}") # Groups in schedule but not in groups table cursor.execute(""" SELECT COUNT(*) FROM schedule s LEFT JOIN groups g ON s.group_id = g.group_id WHERE g.group_id IS NULL """) orphaned_groups = cursor.fetchone()[0] print(f"Orphaned group references: {orphaned_groups}") print() def main(): """Main function to run all checks""" print("School Schedule Database Verification") print("="*40) try: conn, cursor = connect_db() check_teachers_table(cursor) check_groups_table(cursor) check_students_table(cursor) check_schedule_integrity(cursor) conn.close() print("Verification complete!") except Exception as e: print(f"Error during verification: {str(e)}") if __name__ == "__main__": main()