232 lines
7.8 KiB
Python
232 lines
7.8 KiB
Python
#!/usr/bin/env python
|
||
"""
|
||
verify_db.py - Verification script for the school schedule database
|
||
Checks data quality in teachers, groups, and students tables
|
||
"""
|
||
|
||
import sqlite3
|
||
import re
|
||
|
||
def connect_db(db_name='school_schedule.db'):
|
||
"""Connect to the database"""
|
||
conn = sqlite3.connect(db_name)
|
||
cursor = conn.cursor()
|
||
return conn, cursor
|
||
|
||
def check_teachers_table(cursor):
|
||
"""Check the teachers table for data quality issues"""
|
||
print("Checking teachers table...")
|
||
|
||
cursor.execute("SELECT COUNT(*) FROM teachers")
|
||
total_count = cursor.fetchone()[0]
|
||
print(f"Total teachers: {total_count}")
|
||
|
||
# Find teachers with default names
|
||
cursor.execute("SELECT name FROM teachers WHERE name LIKE '%Default Teacher%' OR name LIKE '%Неизвестный%'")
|
||
default_teachers = cursor.fetchall()
|
||
print(f"Teachers with default names: {len(default_teachers)}")
|
||
for teacher in default_teachers:
|
||
print(f" - {teacher[0]}")
|
||
|
||
# Find potentially invalid teacher names
|
||
invalid_teachers = []
|
||
cursor.execute("SELECT name FROM teachers")
|
||
all_teachers = cursor.fetchall()
|
||
for (teacher_name,) in all_teachers:
|
||
if not is_valid_teacher_name(teacher_name):
|
||
invalid_teachers.append(teacher_name)
|
||
|
||
print(f"Potentially invalid teacher names: {len(invalid_teachers)}")
|
||
for teacher in invalid_teachers:
|
||
print(f" - {teacher}")
|
||
|
||
print()
|
||
|
||
def is_valid_teacher_name(name):
|
||
"""Check if a name looks like a valid teacher name"""
|
||
# Skip default names as they're intentionally different
|
||
if 'Default Teacher' in name or 'Неизвестный' in name:
|
||
return True # Considered valid as intentional placeholders
|
||
|
||
# Check for common invalid patterns
|
||
invalid_patterns = [
|
||
r'^\d+[А-ЯA-Z]$', # Class pattern like "8А", "11B"
|
||
r'^[А-ЯЁA-Z]\d+\s+[А-ЯЁA-Z]\d+$', # "E4 E5" pattern
|
||
r'.*[Gg]roup.*', # Group identifiers
|
||
r'.*[Tt]rack.*', # Track identifiers
|
||
r'^[А-ЯЁA-Z]\d+$', # Single group identifiers like "E4"
|
||
r'.*[Cc]lub.*', # Club identifiers
|
||
]
|
||
|
||
for pattern in invalid_patterns:
|
||
if re.match(pattern, name, re.IGNORECASE):
|
||
return False
|
||
|
||
# Valid teacher name patterns
|
||
valid_patterns = [
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+', # Russian names
|
||
r'^[A-Z][a-z]+\s+[A-Z][a-z]+', # English names
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.', # Name with initial
|
||
r'^[A-Z][a-z]+\s+[A-Z]\.', # Name with initial (English)
|
||
]
|
||
|
||
for pattern in valid_patterns:
|
||
if re.match(pattern, name):
|
||
return True
|
||
|
||
# If it's a reasonably long string with spaces and proper capitalization
|
||
parts = name.split()
|
||
if len(parts) >= 2 and len(name) >= 5:
|
||
# Check if parts start with capital letters
|
||
if all(len(part) > 0 and part[0].isupper() for part in parts):
|
||
return True
|
||
|
||
return False
|
||
|
||
def check_groups_table(cursor):
|
||
"""Check the groups table for data quality issues"""
|
||
print("Checking groups table...")
|
||
|
||
cursor.execute("SELECT COUNT(*) FROM groups")
|
||
total_count = cursor.fetchone()[0]
|
||
print(f"Total groups: {total_count}")
|
||
|
||
# Get all group names
|
||
cursor.execute("SELECT name FROM groups")
|
||
all_groups = cursor.fetchall()
|
||
|
||
# Check for potential student names in group names
|
||
potential_student_names = []
|
||
for (group_name,) in all_groups:
|
||
if looks_like_student_name(group_name):
|
||
potential_student_names.append(group_name)
|
||
|
||
print(f"Groups that look like student names: {len(potential_student_names)}")
|
||
for group in potential_student_names[:10]: # Show first 10
|
||
print(f" - {group}")
|
||
|
||
print()
|
||
|
||
def looks_like_student_name(name):
|
||
"""Check if a name looks like a student name instead of a group"""
|
||
# Class patterns like "8А", "11B" are OK as groups
|
||
class_pattern = r'^\d+[А-ЯA-Z]$'
|
||
if re.match(class_pattern, name):
|
||
return False
|
||
|
||
# Student names typically follow name patterns
|
||
name_pattern = r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+' # Russian name
|
||
if re.match(name_pattern, name):
|
||
return True
|
||
|
||
name_pattern = r'^[A-Z][a-z]+\s+[A-Z][a-z]+' # English name
|
||
if re.match(name_pattern, name):
|
||
return True
|
||
|
||
# If it contains common group identifiers, it's likely a valid group
|
||
group_indicators = ['club', 'track', 'group', 'module', '-', 'class']
|
||
if any(indicator in name.lower() for indicator in group_indicators):
|
||
return False
|
||
|
||
return False
|
||
|
||
def check_students_table(cursor):
|
||
"""Check the students table"""
|
||
print("Checking students table...")
|
||
|
||
cursor.execute("SELECT COUNT(*) FROM students")
|
||
total_count = cursor.fetchone()[0]
|
||
print(f"Total students: {total_count}")
|
||
|
||
# Get sample students
|
||
cursor.execute("SELECT full_name, class_name FROM students LIMIT 5")
|
||
samples = cursor.fetchall()
|
||
print("Sample students:")
|
||
for student in samples:
|
||
print(f" - {student[0]} (Class: {student[1]})")
|
||
|
||
print()
|
||
|
||
def check_schedule_integrity(cursor):
|
||
"""Check the schedule table for data consistency"""
|
||
print("Checking schedule table integrity...")
|
||
|
||
# Count total schedule entries
|
||
cursor.execute("SELECT COUNT(*) FROM schedule")
|
||
total_schedules = cursor.fetchone()[0]
|
||
print(f"Total schedule entries: {total_schedules}")
|
||
|
||
# Count entries with valid relationships
|
||
cursor.execute("""
|
||
SELECT COUNT(*)
|
||
FROM schedule s
|
||
JOIN students st ON s.student_id = st.student_id
|
||
JOIN subjects su ON s.subject_id = su.subject_id
|
||
JOIN teachers t ON s.teacher_id = t.teacher_id
|
||
JOIN groups g ON s.group_id = g.group_id
|
||
""")
|
||
valid_relationships = cursor.fetchone()[0]
|
||
print(f"Schedules with valid relationships: {valid_relationships}")
|
||
|
||
# Check for orphaned records
|
||
print("Checking for orphaned records...")
|
||
|
||
# Students in schedule but not in students table
|
||
cursor.execute("""
|
||
SELECT COUNT(*) FROM schedule s
|
||
LEFT JOIN students st ON s.student_id = st.student_id
|
||
WHERE st.student_id IS NULL
|
||
""")
|
||
orphaned_students = cursor.fetchone()[0]
|
||
print(f"Orphaned student references: {orphaned_students}")
|
||
|
||
# Subjects in schedule but not in subjects table
|
||
cursor.execute("""
|
||
SELECT COUNT(*) FROM schedule s
|
||
LEFT JOIN subjects su ON s.subject_id = su.subject_id
|
||
WHERE su.subject_id IS NULL
|
||
""")
|
||
orphaned_subjects = cursor.fetchone()[0]
|
||
print(f"Orphaned subject references: {orphaned_subjects}")
|
||
|
||
# Teachers in schedule but not in teachers table
|
||
cursor.execute("""
|
||
SELECT COUNT(*) FROM schedule s
|
||
LEFT JOIN teachers t ON s.teacher_id = t.teacher_id
|
||
WHERE t.teacher_id IS NULL
|
||
""")
|
||
orphaned_teachers = cursor.fetchone()[0]
|
||
print(f"Orphaned teacher references: {orphaned_teachers}")
|
||
|
||
# Groups in schedule but not in groups table
|
||
cursor.execute("""
|
||
SELECT COUNT(*) FROM schedule s
|
||
LEFT JOIN groups g ON s.group_id = g.group_id
|
||
WHERE g.group_id IS NULL
|
||
""")
|
||
orphaned_groups = cursor.fetchone()[0]
|
||
print(f"Orphaned group references: {orphaned_groups}")
|
||
|
||
print()
|
||
|
||
def main():
|
||
"""Main function to run all checks"""
|
||
print("School Schedule Database Verification")
|
||
print("="*40)
|
||
|
||
try:
|
||
conn, cursor = connect_db()
|
||
|
||
check_teachers_table(cursor)
|
||
check_groups_table(cursor)
|
||
check_students_table(cursor)
|
||
check_schedule_integrity(cursor)
|
||
|
||
conn.close()
|
||
print("Verification complete!")
|
||
|
||
except Exception as e:
|
||
print(f"Error during verification: {str(e)}")
|
||
|
||
if __name__ == "__main__":
|
||
main() |