#!/usr/bin/env python """ database.py - School schedule database (normalized version) Creates normalized tables and extracts from CSV with proper relationships """ import sqlite3 import csv import os import sys import re class SchoolScheduleDB: def __init__(self, db_name='school_schedule.db'): self.conn = sqlite3.connect(db_name) self.cursor = self.conn.cursor() # Initialize database tables self.create_tables() def normalize_class_name(self, class_name): """Normalize class names to handle Cyrillic/Latin character differences""" if not class_name: return class_name # Replace Cyrillic characters with Latin equivalents in class names # Specifically: replace Cyrillic А (U+0410) with Latin A (U+0041) normalized = class_name.replace('А', 'A').replace('В', 'B').replace('С', 'C') return normalized def create_tables(self): """Create normalized tables with proper relationships""" # Teachers table self.cursor.execute(""" CREATE TABLE IF NOT EXISTS teachers ( teacher_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL, email TEXT, phone TEXT ) """) # Subjects table self.cursor.execute(""" CREATE TABLE IF NOT EXISTS subjects ( subject_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL, description TEXT ) """) # Days table self.cursor.execute(""" CREATE TABLE IF NOT EXISTS days ( day_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL -- e.g., Monday, Tuesday, etc. ) """) # Periods table - with proper unique constraint self.cursor.execute(""" CREATE TABLE IF NOT EXISTS periods ( period_id INTEGER PRIMARY KEY AUTOINCREMENT, period_number INTEGER, start_time TEXT, end_time TEXT, UNIQUE(period_number, start_time, end_time) ) """) # Groups table self.cursor.execute(""" CREATE TABLE IF NOT EXISTS groups ( group_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL, description TEXT, class_name TEXT ) """) # Students table self.cursor.execute(""" CREATE TABLE IF NOT EXISTS students ( student_id INTEGER PRIMARY KEY AUTOINCREMENT, class_name TEXT, full_name TEXT NOT NULL, UNIQUE(full_name, class_name) -- Prevent duplicate student entries ) """) # Homeroom teachers table self.cursor.execute(""" CREATE TABLE IF NOT EXISTS homeroom_teachers ( homeroom_id INTEGER PRIMARY KEY AUTOINCREMENT, class_name TEXT UNIQUE, teacher_name TEXT, classroom TEXT, parent_meeting_room TEXT, internal_number TEXT, mobile_number TEXT ) """) # Schedule table with foreign key relationships self.cursor.execute(""" CREATE TABLE IF NOT EXISTS schedule ( entry_id INTEGER PRIMARY KEY AUTOINCREMENT, student_id INTEGER, subject_id INTEGER, teacher_id INTEGER, day_id INTEGER, period_id INTEGER, group_id INTEGER, FOREIGN KEY (student_id) REFERENCES students(student_id), FOREIGN KEY (subject_id) REFERENCES subjects(subject_id), FOREIGN KEY (teacher_id) REFERENCES teachers(teacher_id), FOREIGN KEY (day_id) REFERENCES days(day_id), FOREIGN KEY (period_id) REFERENCES periods(period_id), FOREIGN KEY (group_id) REFERENCES groups(group_id) ) """) self.conn.commit() def populate_periods_table(self): """Populate the periods table with standard school periods""" period_times = { '1': ('09:00', '09:40'), '2': ('10:00', '10:40'), '3': ('11:00', '11:40'), '4': ('11:50', '12:30'), '5': ('12:40', '13:20'), '6': ('13:30', '14:10'), '7': ('14:20', '15:00'), '8': ('15:20', '16:00'), '9': ('16:15', '16:55'), '10': ('17:05', '17:45'), '11': ('17:55', '18:35'), '12': ('18:45', '19:20'), '13': ('19:20', '20:00') } for period_num, (start_time, end_time) in period_times.items(): self.cursor.execute( "INSERT OR IGNORE INTO periods (period_number, start_time, end_time) VALUES (?, ?, ?)", (int(period_num), start_time, end_time) ) # Add days of the week days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] for day in days_of_week: self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (day,)) self.conn.commit() def update_database_from_csv(self, auto_update=False): """Update the database from CSV files in the sample_data directory""" # Define the sample_data directory path sample_data_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/Projects/sample_data" # Check if the directory exists if not os.path.exists(sample_data_dir): print(f"Directory {sample_data_dir} does not exist.") return # Find all CSV files in the sample_data directory csv_files = [f for f in os.listdir(sample_data_dir) if f.lower().endswith('.csv')] if not csv_files: print(f"No CSV files found in {sample_data_dir}") return print(f"Found {len(csv_files)} student data CSV file(s):") for i, file in enumerate(csv_files, 1): print(f" {i}. {file}") if auto_update: print("\nAuto-updating database with all student data CSV files...") print("\nUpdating database with {} file(s):".format(len(csv_files))) for file in csv_files: print(f" - {file}") csv_path = os.path.join(sample_data_dir, file) self.process_csv_with_teacher_mapping(csv_path) else: print("\nChoose an option:") print("1. Update all CSV files") print("2. Select specific CSV files") print("3. Cancel") choice = input("Enter your choice (1-3): ").strip() if choice == "1": print("\nUpdating database with all CSV files...") for file in csv_files: print(f"Processing {file}...") csv_path = os.path.join(sample_data_dir, file) self.process_csv_with_teacher_mapping(csv_path) elif choice == "2": print("\nAvailable CSV files:") for i, file in enumerate(csv_files, 1): print(f" {i}. {file}") selections = input("\nEnter the numbers of the files to process (comma-separated): ") try: indices = [int(x.strip()) - 1 for x in selections.split(',')] valid_indices = [i for i in indices if 0 <= i < len(csv_files)] for i in valid_indices: file = csv_files[i] print(f"Processing {file}...") csv_path = os.path.join(sample_data_dir, file) self.process_csv_with_teacher_mapping(csv_path) except ValueError: print("Invalid input. Please enter comma-separated numbers.") else: print("Update cancelled.") # Update teachers from the Teachers.csv file teachers_file = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/Projects/Teachers.csv" if os.path.exists(teachers_file): print("Processing teachers from Teachers.csv...") self.update_teachers_from_cheat_sheet(teachers_file) else: print("Teachers.csv file not found, skipping...") # Update homeroom teachers from the dedicated file homeroom_file = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/Projects/Homeroom_teachers.csv" if os.path.exists(homeroom_file): print("Processing homeroom teachers...") self.update_homeroom_teachers(homeroom_file) else: print("Homeroom teachers file not found, skipping...") print("Database updated successfully with selected CSV data.") def update_homeroom_teachers(self, homeroom_file): """Update homeroom teachers from the dedicated CSV file""" with open(homeroom_file, 'r', encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: # Normalize the class name to handle Cyrillic/Latin differences normalized_class = self.normalize_class_name(row['Class']) self.cursor.execute(""" INSERT OR REPLACE INTO homeroom_teachers (class_name, teacher_name, classroom, parent_meeting_room, internal_number, mobile_number) VALUES (?, ?, ?, ?, ?, ?) """, ( normalized_class, row['Homeroom Teacher'], row['Classroom'], row['Parent Meeting Room'], row['Internal Number'], row['Mobile Number'] )) self.conn.commit() print("Homeroom teachers updated successfully.") def process_csv_with_teacher_mapping(self, csv_file): """Process CSV with teacher-subject mapping based on positional order""" if not os.path.exists(csv_file): return False with open(csv_file, 'r', encoding='utf-8') as file: reader = csv.reader(file) rows = list(reader) # Identify header row - look for the row containing "ФИО" (full name) or similar indicators header_idx = None for i, row in enumerate(rows): for cell in row: if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell): header_idx = i break if header_idx is not None: break if header_idx is None: # Check if this file contains class and name columns that identify it as a student data file # Even if the header doesn't contain ФИО, we might still be able to identify student data has_class_indicators = any( any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) for row in rows[:min(len(rows), 10)] # Check first 10 rows ) has_name_indicators = any( any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']) for row in rows[:min(len(rows), 10)] # Check first 10 rows ) if has_class_indicators and has_name_indicators: # Try to find the header row by looking for class and name indicators for i, row in enumerate(rows): if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \ any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']): header_idx = i break if header_idx is None: print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns") return False # Build a mapping of subject names in the header row header_row = rows[header_idx] header_subjects = {} for col_idx, subject_name in enumerate(header_row): subject_name = str(subject_name).strip() if (subject_name and subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкaфчика', 'локера'] and subject_name.strip() != "" and "ф.и.о" not in subject_name.lower() and "сортировка" not in subject_name.lower() and "номер" not in subject_name.lower() and "№" not in subject_name): header_subjects[col_idx] = subject_name # Map column index to subject name # IMPROVED TEACHER-SUBJECT MAPPING: Extract teacher-subject pairs from the first rows # Match base subjects to teachers and then map to header subjects base_subject_teacher_map = {} # Look through the first rows to find teacher-subject pairs for i in range(min(len(rows), header_idx)): # Only go up to header row current_row = rows[i] # Process the row in pairs of (subject, teacher, group_info) pattern j = 0 while j < len(current_row) - 1: subject_cell = current_row[j].strip() if j < len(current_row) else "" teacher_cell = current_row[j + 1].strip() if j + 1 < len(current_row) else "" group_cell = current_row[j + 2].strip() if j + 2 < len(current_row) else "" # Check if the first cell is a subject, the second is a teacher, and the third is a group if (subject_cell and self._is_likely_subject_name_simple(subject_cell) and teacher_cell and self._is_likely_teacher_name_enhanced(teacher_cell) and group_cell and self._is_likely_group_identifier(group_cell)): # Add to the base subject teacher map (if multiple teachers for same subject, store all) if subject_cell not in base_subject_teacher_map: base_subject_teacher_map[subject_cell] = [] if teacher_cell not in base_subject_teacher_map[subject_cell]: base_subject_teacher_map[subject_cell].append(teacher_cell) # Move to the next potential triplet (subject, teacher, group_info) j += 3 # Skip subject, teacher, and group info # Also check the row immediately before the header row for additional teacher-subject pairs if header_idx > 0: prev_row = rows[header_idx - 1] j = 0 while j < len(prev_row) - 1: subject_cell = prev_row[j].strip() if j < len(prev_row) else "" teacher_cell = prev_row[j + 1].strip() if j + 1 < len(prev_row) else "" group_cell = prev_row[j + 2].strip() if j + 2 < len(prev_row) else "" # Check if the first cell is a subject, the second is a teacher, and the third is a group if (subject_cell and self._is_likely_subject_name_simple(subject_cell) and teacher_cell and self._is_likely_teacher_name_enhanced(teacher_cell) and group_cell and self._is_likely_group_identifier(group_cell)): # Add to the base subject teacher map (if multiple teachers for same subject, store all) if subject_cell not in base_subject_teacher_map: base_subject_teacher_map[subject_cell] = [] if teacher_cell not in base_subject_teacher_map[subject_cell]: base_subject_teacher_map[subject_cell].append(teacher_cell) # Move to the next potential triplet (subject, teacher, group_info) j += 3 # Skip subject, teacher, and group info # Now map the header subjects to the teachers using base subject matching teacher_subject_map = {} for col_idx, header_subject in header_subjects.items(): # Find the base subject that corresponds to this header subject base_subject = self._find_base_subject(header_subject, base_subject_teacher_map.keys()) if base_subject and base_subject in base_subject_teacher_map: # Use the first teacher for this base subject teacher_subject_map[header_subject] = base_subject_teacher_map[base_subject][0] # Process each student row for student_row in rows[header_idx + 1:]: # Determine the structure dynamically based on the header class_col_idx = None name_col_idx = None # Find the index of the class column (usually called "Класс") for idx, header in enumerate(header_row): if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header).lower(): class_col_idx = idx break # Find the index of the name column (usually called "ФИО") for idx, header in enumerate(header_row): if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower(): name_col_idx = idx break # If we couldn't find the columns properly, skip this row if class_col_idx is None or name_col_idx is None: continue # Check if this row has valid data in the expected columns if (len(student_row) > max(class_col_idx, name_col_idx) and student_row[class_col_idx].strip() and # class name exists student_row[name_col_idx].strip() and # student name exists self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)): name = student_row[name_col_idx].strip() # Name column class_name = student_row[class_col_idx].strip() # Class column # Normalize the class name to handle Cyrillic/Latin differences normalized_class = self.normalize_class_name(class_name) # Insert student into the database (using INSERT OR REPLACE to prevent duplicates) self.cursor.execute( "INSERT OR REPLACE INTO students (class_name, full_name) VALUES (?, ?)", (normalized_class, name) ) # Get the student_id for this student self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, normalized_class)) student_id_result = self.cursor.fetchone() if student_id_result is None: continue student_id = student_id_result[0] # Process schedule data for this student # Go through each column to find subject and group info for col_idx, cell_value in enumerate(student_row): if cell_value and col_idx < len(header_row): # Get the subject from the header subject_header = header_row[col_idx] if col_idx < len(header_row) else "" # Skip columns that don't contain schedule information if (col_idx == 0 or col_idx == 1 or col_idx == 2 or col_idx == class_col_idx or col_idx == name_col_idx or # skip metadata cols "сортировка" in subject_header.lower() or "номер" in subject_header.lower() or "шкaфчика" in subject_header.lower() or "локера" in subject_header.lower()): continue # Extract group information from the cell group_assignment = cell_value.strip() if group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "": # Find the teacher associated with this subject subject_name = str(subject_header).strip() teacher_name = teacher_subject_map.get(subject_name, f"Default Teacher for {subject_name}") # Insert the entities into their respective tables first # Then get their IDs to create the schedule entry self._process_schedule_entry_with_teacher_mapping( student_id, group_assignment, subject_name, teacher_name ) self.conn.commit() return True def _find_base_subject(self, header_subject, base_subjects): """Find the base subject that corresponds to a header subject""" header_lower = header_subject.lower() # Check for direct matches first for base_subject in base_subjects: if base_subject.lower() in header_lower or header_lower in base_subject.lower(): return base_subject # Check for partial matches with common patterns for base_subject in base_subjects: # Remove common suffixes from header subject and try to match simplified_header = header_lower.replace(" 1 модуль", "").replace(" 2 модуль", "") \ .replace(" 1,2 модуль", "").replace(" 1 мод.", "").replace(" 2 мод.", "") \ .replace(" / ", " ").replace(" ", " ") simplified_base = base_subject.lower().replace(" / ", " ").replace(" ", " ") if simplified_base in simplified_header or simplified_header in simplified_base: return base_subject return None def _is_likely_subject_name_simple(self, text): """Simple check if the text is likely a subject name""" if not text or len(text.strip()) < 2: return False text = text.strip().lower() # Common subject indicators in Russian and English subject_indicators = [ 'технотрек', 'матем', 'информ', 'англ', 'русск', 'физика', 'химия', 'биол', 'история', 'общество', 'география', 'литер', 'физкульт', 'лидерство', 'спорт. клуб', 'орксэ', 'китайск', 'немецк', 'француз', 'speaking club', 'maths', 'ict', 'geography', 'physics', 'robotics', 'culinary', 'science', 'ai core', 'vr/ar', 'cybersafety', 'business', 'design', 'prototype', 'mediacom', 'robotics track', 'culinary track', 'science track', 'ai core track', 'vr/ar track', 'cybersafety track', 'programming', 'algorithm', 'logic', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics', 'climbing', 'games', 'art', 'music', 'dance', 'karate', 'judo', 'chess', 'leadership', 'алгоритмика', 'робототехника', 'программирование', 'математика', 'информатика', 'орксэ', 'английский', 'русский', 'физическая культура', 'орксэ', 'изо', 'алгебра', 'геометрия', 'астрономия', 'экология', 'астрономия', 'иностранный', 'ит', 'computer science', 'informatics' ] # Check if text contains any of the subject indicators for indicator in subject_indicators: if indicator in text: return True return False def _is_likely_subject_name(self, text): """Check if the text is likely a subject name""" if not text or len(text.strip()) < 2: return False text = text.strip() # Common subject indicators in Russian and English subject_indicators = [ 'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История', 'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство', 'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths', 'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics', 'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track', 'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track', 'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding', 'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports', 'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance', 'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship', 'Технотрек 1 модуль', 'Технотрек 2 модуль', 'ОРКСЭ 1,2 модуль', 'Математика 1 модуль', 'Математика 2 модуль', 'Программирование', 'Алгоритмика и логика', 'Лидерство', 'Робототехника', 'Physical Education 1,2 модуль', 'Английский 1 модуль', 'Английский 2 модуль', 'Англ.яз', 'Русск.яз', 'Информ.', 'Матем.', 'Физика', 'Химия', 'Биология', 'История', 'Обществознание', 'География', 'Литература', 'Физическая культура', 'ОРКСЭ', 'ИЗО', 'Китайский', 'Немецкий', 'Французский', 'Алгебра', 'Геометрия', 'Астрономия', 'Экология' ] # Check if text matches any of the subject indicators for indicator in subject_indicators: if indicator.lower() in text.lower(): return True # Check if the text contains common subject-related keywords common_keywords = ['модуль', 'track', 'club', 'group', 'class', 'lesson', 'subject', 'module', 'яз', 'язык'] for keyword in common_keywords: if keyword in text.lower(): return True # Check if text contains specific patterns that indicate it's a subject subject_patterns = [ r'.*[Tt]rack.*', # Track identifiers r'.*[Mm]odule.*', # Module identifiers r'.*[Cc]lub.*', # Club identifiers r'.*[Ss]ubject.*', # Subject identifiers r'.*[Cc]lass.*', # Class identifiers r'.*[Ll]esson.*', # Lesson identifiers ] for pattern in subject_patterns: if re.search(pattern, text): return True return False def _is_valid_student_record_by_cols(self, row, class_col_idx, name_col_idx): """Check if a row represents a valid student record based on specific columns""" # A valid student record should have: # - Non-empty class name in the class column # - Non-empty student name in the name column if len(row) <= max(class_col_idx, name_col_idx): return False class_name = row[class_col_idx].strip() if len(row) > class_col_idx else "" student_name = row[name_col_idx].strip() if len(row) > name_col_idx else "" # Check if the class name looks like an actual class (contains a number followed by a letter) class_pattern = r'^\d+[А-ЯA-Z]$' # e.g., 6А, 11А, 4B if re.match(class_pattern, class_name): return bool(student_name and student_name != class_name) # Ensure name exists and is different from class # If not matching class pattern, check if the name field is not just another class-like value name_pattern = r'^\d+[А-ЯA-Z]$' # This would indicate it's probably a class, not a name if re.match(name_pattern, student_name): return False # This row has a class in the name field, so not valid return bool(class_name and student_name and class_name != student_name) def _process_schedule_entry_with_teacher_mapping(self, student_id, group_info, subject_info, teacher_name): """Process individual schedule entries with explicit teacher mapping and insert into normalized tables""" # Clean up the inputs subject_name = subject_info.strip() if subject_info.strip() else "General Class" group_assignment = group_info.strip() # Only proceed if we have valid data if subject_name and group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "": # Insert subject if not exists and get its ID self.cursor.execute("INSERT OR IGNORE INTO subjects (name) VALUES (?)", (subject_name,)) self.cursor.execute("SELECT subject_id FROM subjects WHERE name = ?", (subject_name,)) subject_id = self.cursor.fetchone()[0] # Insert teacher if not exists and get its ID # Use the teacher name as is, without default creation if not found self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (teacher_name,)) self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (teacher_name,)) teacher_result = self.cursor.fetchone() if teacher_result: teacher_id = teacher_result[0] else: # Fallback to a default teacher if the extracted name is invalid default_teacher = "Неизвестный преподаватель" self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (default_teacher,)) self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (default_teacher,)) teacher_id = self.cursor.fetchone()[0] # Use a default day for now (in a real system, we'd extract this from the schedule) # For now, we'll randomly assign to a day of the week import random days_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"] selected_day = random.choice(days_list) self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (selected_day,)) self.cursor.execute("SELECT day_id FROM days WHERE name = ?", (selected_day,)) day_id = self.cursor.fetchone()[0] # Use a default period - for now we'll use period 1, but in a real system # we would need to extract this from the CSV if available self.cursor.execute("SELECT period_id FROM periods WHERE period_number = 1 LIMIT 1") period_result = self.cursor.fetchone() if period_result: period_id = period_result[0] else: # Fallback if no periods were inserted self.cursor.execute("SELECT period_id FROM periods LIMIT 1") period_id = self.cursor.fetchone()[0] # Clean the group name to separate it from student data group_name = self._clean_group_name(group_assignment) self.cursor.execute("INSERT OR IGNORE INTO groups (name) VALUES (?)", (group_name,)) self.cursor.execute("SELECT group_id FROM groups WHERE name = ?", (group_name,)) group_id = self.cursor.fetchone()[0] # Insert the schedule entry self.cursor.execute(""" INSERT OR IGNORE INTO schedule (student_id, subject_id, teacher_id, day_id, period_id, group_id) VALUES (?, ?, ?, ?, ?, ?) """, (student_id, subject_id, teacher_id, day_id, period_id, group_id)) def _clean_group_name(self, raw_group_data): """Extract clean group name from potentially mixed student/group data""" # Remove potential student names from the group data # Group names typically contain numbers, class identifiers, or specific activity names cleaned = raw_group_data.strip() # If the group data looks like it contains a student name pattern, # we'll try to extract just the group identifier part if re.match(r'^\d+[А-ЯA-Z]', cleaned): # This looks like a class designation, return as is return cleaned # If the group data contains common group indicators, return as is group_indicators = ['кл', 'class', 'club', 'track', 'group', 'module', '-'] if any(indicator in cleaned.lower() for indicator in group_indicators): return cleaned # If the group data looks like a subject-identifier pattern, return as is subject_indicators = ['ICT', 'English', 'Math', 'Physics', 'Chemistry', 'Biology', 'Science'] if any(indicator in cleaned for indicator in subject_indicators): return cleaned # If none of the above conditions match, return a generic group name return f"Group_{hash(cleaned) % 10000}" def _is_likely_teacher_name(self, text): """Check if the text is likely to be a teacher name""" if not text or len(text.strip()) < 5: # Require minimum length for a name return False text = text.strip() # Common non-name values that appear in the CSV common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group', 'каб.', 'гр.', 'фитнес', 'каб', 'все группы', '1 группа', '2 группа', 'Е1', 'Е2', 'Е3', 'Е4', 'Е5', 'Е6', 'Е1 Е2', 'Е4 Е5', 'E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'гр 1', 'гр 2'] if text.lower() in common_non_names: return False # Exclusion patterns for non-teacher entries exclusion_patterns = [ r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$', # E4 E5 pattern r'^[A-Z]\d+\s+[A-Z]\d+$', # English groups r'.*[Tt]rack.*', # Track identifiers r'.*[Gg]roup.*', # Group identifiers r'.*\d+[А-ЯA-Z]\d*$', # Number-letter combos r'^[А-ЯЁA-Z].*\d+', # Text ending with digits r'.*[Cc]lub.*', # Club identifiers r'.*[Rr]oom.*', # Room identifiers r'.*[Cc]lass.*', # Class identifiers r'.*[Pp]eriod.*', # Period identifiers r'^\d+$', # Just numbers r'^[А-ЯЁA-Z]*$', # All caps words r'^[А-ЯЁA-Z\s\d]+$', # Caps words and numbers (likely room numbers) r'^[ЕеEe][\d\s,]+$' # Room identifiers like E1, E2, etc. ] for pattern in exclusion_patterns: if re.match(pattern, text, re.IGNORECASE): return False # Positive patterns for teacher names teacher_patterns = [ r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.\s*[А-ЯЁ]\.$', # Иванов А.А. r'^[А-ЯЁ]\.\s*[А-ЯЁ]\.\s+[А-ЯЁ][а-яё]+$', # А.А. Иванов r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Full name r'^[A-Z][a-z]+\s+[A-Z][a-z]+$', # John Smith r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$', # Smith J.J. r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Russian names without patronymic r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$', # Initials format r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+', # Names without periods ] for pattern in teacher_patterns: if re.match(pattern, text.strip()): return True # Additional check: if it looks like a proper name (with capital letters and min length) # and doesn't match exclusion patterns name_parts = text.split() if len(name_parts) >= 2: # At least two parts (first name + last name) # Check if they start with capital letters if all(part[0].isupper() for part in name_parts if len(part) > 1): # Additional check: make sure it's not just a title or other text common_titles = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Teacher', 'Instructor', 'Coach'] if any(title in text for title in common_titles): return False return True return False def _is_likely_subject_label(self, text): """Check if text is likely a subject label like 'Матем.', 'Информ.', 'Англ.яз', etc.""" if not text or len(text) < 2: return False # Common Russian abbreviations for subjects subject_patterns = [ 'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История', 'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство', 'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths', 'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics', 'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track', 'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track', 'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding', 'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports', 'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance', 'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship' ] text_clean = text.strip().lower() for pattern in subject_patterns: if pattern.lower() in text_clean: return True # Also check for specific subject names found in the data specific_subjects = ['матем.', 'информ.', 'англ.яз', 'русск.яз', 'каб.', 'business', 'maths', 'speaking', 'ict', 'geography', 'physics', 'robotics', 'science', 'ai core', 'vr/ar', 'cybersafety', 'design', 'prototype', 'mediacom', 'culinary', 'physical education', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics', 'climbing', 'games', 'art', 'music', 'dance', 'karate', 'chess', 'leadership'] for subj in specific_subjects: if subj in text_clean: return True return False def _find_matching_subject_in_header_from_list(self, subject_label, header_subjects, header_row): """Find the matching full subject name in the header based on the label""" if not subject_label: return None # Look for the best match in the header subjects subject_label_lower = subject_label.lower().replace('.', '').replace('яз', 'язык') # Direct match first for col_idx, full_subj in header_subjects: if subject_label_lower in full_subj.lower() or full_subj.lower() in subject_label_lower: return full_subj # If no direct match, try to find by partial matching in the whole header row for i, header_item in enumerate(header_row): if subject_label_lower in str(header_item).lower() or str(header_item).lower() in subject_label_lower: return str(header_item).strip() # Try more general matching - if label contains common abbreviations for col_idx, full_subj in header_subjects: full_lower = full_subj.lower() if ('матем' in subject_label_lower and 'матем' in full_lower) or \ ('информ' in subject_label_lower and 'информ' in full_lower) or \ ('англ' in subject_label_lower and 'англ' in full_lower) or \ ('русск' in subject_label_lower and 'русск' in full_lower) or \ ('физик' in subject_label_lower and 'физик' in full_lower) or \ ('хим' in subject_label_lower and 'хим' in full_lower) or \ ('биол' in subject_label_lower and 'биол' in full_lower) or \ ('истор' in subject_label_lower and 'истор' in full_lower) or \ ('общ' in subject_label_lower and 'общ' in full_lower) or \ ('географ' in subject_label_lower and 'географ' in full_lower): return full_subj return None def find_student(self, name_query): """Search for students by name""" self.cursor.execute(""" SELECT s.full_name, s.class_name FROM students s WHERE s.full_name LIKE ? LIMIT 10 """, (f'%{name_query}%',)) return self.cursor.fetchall() def get_current_class(self, student_name, current_day, current_time): """Find student's current class""" self.cursor.execute(""" SELECT sub.name, t.name, p.start_time, p.end_time FROM schedule sch JOIN students s ON sch.student_id = s.student_id JOIN subjects sub ON sch.subject_id = sub.subject_id JOIN teachers t ON sch.teacher_id = t.teacher_id JOIN days d ON sch.day_id = d.day_id JOIN periods p ON sch.period_id = p.period_id JOIN groups g ON sch.group_id = g.group_id WHERE s.full_name = ? AND d.name = ? AND p.start_time <= ? AND p.end_time >= ? """, (student_name, current_day, current_time, current_time)) return self.cursor.fetchone() def get_student_schedule(self, student_name): """Get full schedule for a student""" self.cursor.execute(""" SELECT sub.name, t.name, p.start_time, p.end_time, g.name FROM schedule sch JOIN students s ON sch.student_id = s.student_id JOIN subjects sub ON sch.subject_id = sub.subject_id JOIN teachers t ON sch.teacher_id = t.teacher_id JOIN periods p ON sch.period_id = p.period_id JOIN groups g ON sch.group_id = g.group_id WHERE s.full_name = ? ORDER BY p.period_number """, (student_name,)) return self.cursor.fetchall() def _is_likely_teacher_name_enhanced(self, text): """Enhanced check if the text is likely to be a teacher name""" if not text or len(text.strip()) < 5: # Require minimum length for a name return False text = text.strip() # Common non-name values that appear in the CSV common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group', 'каб.', 'гр.', 'фитнес', 'каб', 'все группы', '1 группа', '2 группа', 'Е1', 'Е2', 'Е3', 'Е4', 'Е5', 'Е6', 'Е1 Е2', 'Е4 Е5', 'E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'гр 1', 'гр 2'] if text.lower() in common_non_names: return False # Exclusion patterns for non-teacher entries exclusion_patterns = [ r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$', # E4 E5 pattern r'^[A-Z]\d+\s+[A-Z]\d+$', # English groups r'.*[Tt]rack.*', # Track identifiers r'.*[Gg]roup.*', # Group identifiers r'.*\d+[А-ЯA-Z]\d*$', # Number-letter combos r'^[А-ЯЁA-Z].*\d+', # Text ending with digits r'.*[Cc]lub.*', # Club identifiers r'.*[Rr]oom.*', # Room identifiers r'.*[Cc]lass.*', # Class identifiers r'.*[Pp]eriod.*', # Period identifiers r'^\d+$', # Just numbers r'^[А-ЯЁA-Z]*$', # All caps words r'^[А-ЯЁA-Z\s\d]+$', # Caps words and numbers (likely room numbers) r'^[ЕеEe][\d\s,]+$', # Room identifiers like E1, E2, etc. ] for pattern in exclusion_patterns: if re.match(pattern, text, re.IGNORECASE): return False # Check if it looks like a name with multiple capitalized words (Russian or English) # Teacher names typically have 2-4 words with capitalized first letters words = text.split() # Handle compound names like "Name1 / Name2" if '/' in text: parts = [part.strip() for part in text.split('/')] for part in parts: if self._is_likely_teacher_name_simple(part): return True if len(words) < 2 or len(words) > 4: return False # Check if most words start with capital letters (allowing for exceptions like "van", "de", etc.) capital_words = 0 for word in words: # Skip common particles that are lowercase in names if word in ['van', 'von', 'de', 'di', 'le', 'la', 'du', 'del', 'da', 'и', 'на', 'де']: capital_words += 1 elif word[0].isupper() and len(word) > 1: capital_words += 1 # At least n-1 words should be capitalized (for n-word names) if capital_words < len(words) - 1: return False # Additional check: if it looks like a proper name (with capital letters and min length) # and doesn't match exclusion patterns name_parts = text.split() if len(name_parts) >= 2: # At least two parts (first name + last name) # Check if they start with capital letters if all(part[0].isupper() for part in name_parts if len(part) > 1): # Additional check: make sure it's not just a title or other text common_titles = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Teacher', 'Instructor', 'Coach'] if any(title in text for title in common_titles): return False return True return False def _is_likely_group_identifier(self, text): """Check if text is likely a group identifier like 'E1', 'E2', 'гр 1', etc.""" if not text: return False text = text.strip() # Common group identifiers group_patterns = [ r'^[Ee]\d+', # E1, E2, etc. r'^[Ee]\d+\s*[Ee]\d+', # E1 E2, E4 E5, etc. r'^(гр|group|группа).*', # "гр 1", "group 1", etc. r'^[А-ЯA-Z]\d+', # A1, B2, etc. r'^[А-ЯA-Z]\d+\s+[А-ЯA-Z]\d+', # A1 B2, etc. r'^(все группы|all groups).*', # "все группы", etc. r'^\d+\s*(группа|class).*', # "1 группа", etc. r'^(1|2)\s*(группа|group)', # "1 группа", "2 group", etc. ] for pattern in group_patterns: if re.match(pattern, text, re.IGNORECASE): return True # Additional common group indicators common_groups = ['E1 E2', 'E3 E4', 'E5 E6', 'E1', 'E2', 'E3', 'E4', 'E5', 'E6', '1 группа', '2 группа', 'все группы', 'гр 1', 'гр 2', 'all groups', 'group 1', 'group 2', 'A1', 'B1', 'C1', '4A', '4B', '4C', '4ABC'] return text in common_groups def update_teachers_from_cheat_sheet(self, teachers_csv_path): """Update the teachers table from the Teachers.csv file""" import csv import re with open(teachers_csv_path, 'r', encoding='utf-8') as file: reader = csv.reader(file) rows = list(reader) # Set to store unique teacher names unique_teachers = set() # Process the CSV to extract teacher names # Teacher names appear in positions after subjects and before group info for row in rows: # Process the row in triplets (subject, teacher, group_info) pattern j = 0 while j < len(row) - 2: # Look for a subject-like cell followed by a teacher-like cell subject_cell = str(row[j]).strip() if j < len(row) else "" teacher_cell = str(row[j + 1]).strip() if j + 1 < len(row) else "" group_cell = str(row[j + 2]).strip() if j + 2 < len(row) else "" # Check if middle cell is likely a teacher name (between subject and group) if (subject_cell and self._is_likely_subject_name_simple(subject_cell) and teacher_cell and self._is_likely_teacher_name_enhanced(teacher_cell) and group_cell and self._is_likely_group_identifier(group_cell)): # Handle composite teacher names like "Name1 / Name2" if '/' in teacher_cell: parts = [part.strip() for part in teacher_cell.split('/')] for part in parts: if self._is_likely_teacher_name_enhanced(part): unique_teachers.add(part) else: unique_teachers.add(teacher_cell) # Also check if the current cell itself might be a teacher name current_cell = str(row[j]).strip() if current_cell and self._is_likely_teacher_name_enhanced(current_cell): unique_teachers.add(current_cell) j += 1 # Move to next cell # Special handling for teacher names that appear in combination patterns like "First Last / Other Teacher" for row in rows: for cell in row: cell_str = str(cell).strip() if '/' in cell_str: parts = [part.strip() for part in cell_str.split('/')] for part in parts: part = part.strip() if self._is_likely_teacher_name_enhanced(part): unique_teachers.add(part) # Insert unique teachers into the database for teacher_name in unique_teachers: if teacher_name and len(teacher_name) > 2: # Make sure it's not an empty string or very short self.cursor.execute( "INSERT OR IGNORE INTO teachers (name, email, phone) VALUES (?, NULL, NULL)", (teacher_name,) ) print(f"Added {len(unique_teachers)} unique teachers from Teachers.csv") self.conn.commit() # Main execution - just setup database if __name__ == "__main__": db = SchoolScheduleDB() # Check if auto-update flag is passed as argument auto_update = len(sys.argv) > 1 and sys.argv[1] == '--auto' db.update_database_from_csv(auto_update=auto_update) db.close()