This commit is contained in:
2026-02-05 10:15:09 +03:00
parent 2427fce842
commit 67241a5ed0
33 changed files with 13147 additions and 154 deletions

232
scheduler_bots/verify_db.py Normal file
View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python
"""
verify_db.py - Verification script for the school schedule database
Checks data quality in teachers, groups, and students tables
"""
import sqlite3
import re
def connect_db(db_name='school_schedule.db'):
"""Connect to the database"""
conn = sqlite3.connect(db_name)
cursor = conn.cursor()
return conn, cursor
def check_teachers_table(cursor):
"""Check the teachers table for data quality issues"""
print("Checking teachers table...")
cursor.execute("SELECT COUNT(*) FROM teachers")
total_count = cursor.fetchone()[0]
print(f"Total teachers: {total_count}")
# Find teachers with default names
cursor.execute("SELECT name FROM teachers WHERE name LIKE '%Default Teacher%' OR name LIKE '%Неизвестный%'")
default_teachers = cursor.fetchall()
print(f"Teachers with default names: {len(default_teachers)}")
for teacher in default_teachers:
print(f" - {teacher[0]}")
# Find potentially invalid teacher names
invalid_teachers = []
cursor.execute("SELECT name FROM teachers")
all_teachers = cursor.fetchall()
for (teacher_name,) in all_teachers:
if not is_valid_teacher_name(teacher_name):
invalid_teachers.append(teacher_name)
print(f"Potentially invalid teacher names: {len(invalid_teachers)}")
for teacher in invalid_teachers:
print(f" - {teacher}")
print()
def is_valid_teacher_name(name):
"""Check if a name looks like a valid teacher name"""
# Skip default names as they're intentionally different
if 'Default Teacher' in name or 'Неизвестный' in name:
return True # Considered valid as intentional placeholders
# Check for common invalid patterns
invalid_patterns = [
r'^\d+[А-ЯA-Z]$', # Class pattern like "8А", "11B"
r'^[А-ЯЁA-Z]\d+\s+[А-ЯЁA-Z]\d+$', # "E4 E5" pattern
r'.*[Gg]roup.*', # Group identifiers
r'.*[Tt]rack.*', # Track identifiers
r'^[А-ЯЁA-Z]\d+$', # Single group identifiers like "E4"
r'.*[Cc]lub.*', # Club identifiers
]
for pattern in invalid_patterns:
if re.match(pattern, name, re.IGNORECASE):
return False
# Valid teacher name patterns
valid_patterns = [
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+', # Russian names
r'^[A-Z][a-z]+\s+[A-Z][a-z]+', # English names
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.', # Name with initial
r'^[A-Z][a-z]+\s+[A-Z]\.', # Name with initial (English)
]
for pattern in valid_patterns:
if re.match(pattern, name):
return True
# If it's a reasonably long string with spaces and proper capitalization
parts = name.split()
if len(parts) >= 2 and len(name) >= 5:
# Check if parts start with capital letters
if all(len(part) > 0 and part[0].isupper() for part in parts):
return True
return False
def check_groups_table(cursor):
"""Check the groups table for data quality issues"""
print("Checking groups table...")
cursor.execute("SELECT COUNT(*) FROM groups")
total_count = cursor.fetchone()[0]
print(f"Total groups: {total_count}")
# Get all group names
cursor.execute("SELECT name FROM groups")
all_groups = cursor.fetchall()
# Check for potential student names in group names
potential_student_names = []
for (group_name,) in all_groups:
if looks_like_student_name(group_name):
potential_student_names.append(group_name)
print(f"Groups that look like student names: {len(potential_student_names)}")
for group in potential_student_names[:10]: # Show first 10
print(f" - {group}")
print()
def looks_like_student_name(name):
"""Check if a name looks like a student name instead of a group"""
# Class patterns like "8А", "11B" are OK as groups
class_pattern = r'^\d+[А-ЯA-Z]$'
if re.match(class_pattern, name):
return False
# Student names typically follow name patterns
name_pattern = r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+' # Russian name
if re.match(name_pattern, name):
return True
name_pattern = r'^[A-Z][a-z]+\s+[A-Z][a-z]+' # English name
if re.match(name_pattern, name):
return True
# If it contains common group identifiers, it's likely a valid group
group_indicators = ['club', 'track', 'group', 'module', '-', 'class']
if any(indicator in name.lower() for indicator in group_indicators):
return False
return False
def check_students_table(cursor):
"""Check the students table"""
print("Checking students table...")
cursor.execute("SELECT COUNT(*) FROM students")
total_count = cursor.fetchone()[0]
print(f"Total students: {total_count}")
# Get sample students
cursor.execute("SELECT full_name, class_name FROM students LIMIT 5")
samples = cursor.fetchall()
print("Sample students:")
for student in samples:
print(f" - {student[0]} (Class: {student[1]})")
print()
def check_schedule_integrity(cursor):
"""Check the schedule table for data consistency"""
print("Checking schedule table integrity...")
# Count total schedule entries
cursor.execute("SELECT COUNT(*) FROM schedule")
total_schedules = cursor.fetchone()[0]
print(f"Total schedule entries: {total_schedules}")
# Count entries with valid relationships
cursor.execute("""
SELECT COUNT(*)
FROM schedule s
JOIN students st ON s.student_id = st.student_id
JOIN subjects su ON s.subject_id = su.subject_id
JOIN teachers t ON s.teacher_id = t.teacher_id
JOIN groups g ON s.group_id = g.group_id
""")
valid_relationships = cursor.fetchone()[0]
print(f"Schedules with valid relationships: {valid_relationships}")
# Check for orphaned records
print("Checking for orphaned records...")
# Students in schedule but not in students table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN students st ON s.student_id = st.student_id
WHERE st.student_id IS NULL
""")
orphaned_students = cursor.fetchone()[0]
print(f"Orphaned student references: {orphaned_students}")
# Subjects in schedule but not in subjects table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN subjects su ON s.subject_id = su.subject_id
WHERE su.subject_id IS NULL
""")
orphaned_subjects = cursor.fetchone()[0]
print(f"Orphaned subject references: {orphaned_subjects}")
# Teachers in schedule but not in teachers table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN teachers t ON s.teacher_id = t.teacher_id
WHERE t.teacher_id IS NULL
""")
orphaned_teachers = cursor.fetchone()[0]
print(f"Orphaned teacher references: {orphaned_teachers}")
# Groups in schedule but not in groups table
cursor.execute("""
SELECT COUNT(*) FROM schedule s
LEFT JOIN groups g ON s.group_id = g.group_id
WHERE g.group_id IS NULL
""")
orphaned_groups = cursor.fetchone()[0]
print(f"Orphaned group references: {orphaned_groups}")
print()
def main():
"""Main function to run all checks"""
print("School Schedule Database Verification")
print("="*40)
try:
conn, cursor = connect_db()
check_teachers_table(cursor)
check_groups_table(cursor)
check_students_table(cursor)
check_schedule_integrity(cursor)
conn.close()
print("Verification complete!")
except Exception as e:
print(f"Error during verification: {str(e)}")
if __name__ == "__main__":
main()