Added

2026-02-05 10:15:09 +03:00
parent 2427fce842
commit 67241a5ed0
33 changed files with 13147 additions and 154 deletions
--- a/scheduler_bots/database_fresh.py
+++ b/scheduler_bots/database_fresh.py
@@ -0,0 +1,721 @@
+#!/usr/bin/env python
+"""
+database.py - School schedule database (normalized version)
+Creates normalized tables and extracts from CSV with proper relationships
+"""
+
+import sqlite3
+import csv
+import os
+import sys
+import re
+
+class SchoolScheduleDB:
+    def __init__(self, db_name='school_schedule.db'):
+        self.conn = sqlite3.connect(db_name)
+        self.cursor = self.conn.cursor()
+        # Initialize database tables
+        self.create_tables()
+    
+    def create_tables(self):
+        """Create normalized tables with proper relationships"""
+        # Teachers table
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS teachers (
+                teacher_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT UNIQUE NOT NULL,
+                email TEXT,
+                phone TEXT
+            )
+        """)
+        
+        # Subjects table
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS subjects (
+                subject_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT UNIQUE NOT NULL,
+                description TEXT
+            )
+        """)
+        
+        # Days table
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS days (
+                day_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT UNIQUE NOT NULL  -- e.g., Monday, Tuesday, etc.
+            )
+        """)
+        
+        # Periods table - with proper unique constraint
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS periods (
+                period_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                period_number INTEGER,
+                start_time TEXT,
+                end_time TEXT,
+                UNIQUE(period_number, start_time, end_time)
+            )
+        """)
+        
+        # Groups table
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS groups (
+                group_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT UNIQUE NOT NULL,
+                description TEXT,
+                class_name TEXT
+            )
+        """)
+        
+        # Students table
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS students (
+                student_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                class_name TEXT,
+                full_name TEXT NOT NULL
+            )
+        """)
+        
+        # Schedule table with foreign key relationships
+        self.cursor.execute("""
+            CREATE TABLE IF NOT EXISTS schedule (
+                entry_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                student_id INTEGER,
+                subject_id INTEGER,
+                teacher_id INTEGER,
+                day_id INTEGER,
+                period_id INTEGER,
+                group_id INTEGER,
+                FOREIGN KEY (student_id) REFERENCES students(student_id),
+                FOREIGN KEY (subject_id) REFERENCES subjects(subject_id),
+                FOREIGN KEY (teacher_id) REFERENCES teachers(teacher_id),
+                FOREIGN KEY (day_id) REFERENCES days(day_id),
+                FOREIGN KEY (period_id) REFERENCES periods(period_id),
+                FOREIGN KEY (group_id) REFERENCES groups(group_id)
+            )
+        """)
+        
+        self.conn.commit()
+    
+    def populate_periods_table(self):
+        """Populate the periods table with standard school periods"""
+        period_times = {
+            '1': ('09:00', '09:40'),
+            '2': ('10:00', '10:40'),
+            '3': ('11:00', '11:40'),
+            '4': ('11:50', '12:30'),
+            '5': ('12:40', '13:20'),
+            '6': ('13:30', '14:10'),
+            '7': ('14:20', '15:00'),
+            '8': ('15:20', '16:00'),
+            '9': ('16:15', '16:55'),
+            '10': ('17:05', '17:45'),
+            '11': ('17:55', '18:35'),
+            '12': ('18:45', '19:20'),
+            '13': ('19:20', '20:00')
+        }
+        
+        for period_num, (start_time, end_time) in period_times.items():
+            self.cursor.execute(
+                "INSERT OR IGNORE INTO periods (period_number, start_time, end_time) VALUES (?, ?, ?)",
+                (int(period_num), start_time, end_time)
+            )
+        
+        # Add days of the week
+        days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
+        for day in days_of_week:
+            self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (day,))
+        
+        self.conn.commit()
+    
+    def update_database_from_csv(self, auto_update=True):
+        """Automatically update database from specific CSV files in the sample_data directory"""
+        sample_data_dir = "sample_data"
+        
+        if not os.path.exists(sample_data_dir):
+            print(f"Directory '{sample_data_dir}' not found.")
+            return
+
+        # Get all CSV files and filter out the schedule template and sheet files
+        all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
+        
+        # Keep only the actual student distribution files (not the sheets)
+        csv_files = []
+        for filename in all_csv_files:
+            if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
+                csv_files.append(filename)
+        
+        if not csv_files:
+            print(f"No student data CSV files found in '{sample_data_dir}' directory.")
+            return
+        
+        print(f"Found {len(csv_files)} student data CSV file(s):")
+        for i, filename in enumerate(csv_files, 1):
+            print(f"  {i}. {filename}")
+        
+        if auto_update:
+            print("\nAuto-updating database with all student data CSV files...")
+            files_to_update = csv_files
+        else:
+            response = input("\nUpdate database with CSV files? (yes/no): ").lower()
+            
+            if response not in ['yes', 'y', 'да']:
+                print("Skipping database update.")
+                return
+            
+            print(f"\n0. Update all files")
+            
+            try:
+                selection = input(f"\nSelect file(s) to update (0 for all, or comma-separated numbers like 1,2,3): ")
+                
+                if selection.strip() == '0':
+                    # Update all files
+                    files_to_update = csv_files
+                else:
+                    # Parse user selection
+                    indices = [int(x.strip()) - 1 for x in selection.split(',')]
+                    files_to_update = [csv_files[i] for i in indices if 0 <= i < len(csv_files)]
+                
+                if not files_to_update:
+                    print("No valid selections made.")
+                    return
+            except ValueError:
+                print("Invalid input. Please enter numbers separated by commas or '0' for all files.")
+                return
+
+        # Populate the periods and days tables first
+        self.populate_periods_table()
+        
+        print(f"\nUpdating database with {len(files_to_update)} file(s):")
+        for filename in files_to_update:
+            print(f"  - {filename}")
+            
+            csv_path = os.path.join(sample_data_dir, filename)
+            print(f"Processing {csv_path}...")
+            
+            self.process_csv_with_teacher_mapping(csv_path)
+        
+        print("Database updated successfully with selected CSV data.")
+    
+    def process_csv_with_teacher_mapping(self, csv_file):
+        """Process CSV with teacher-subject mapping based on positional order"""
+        if not os.path.exists(csv_file):
+            return False
+        
+        with open(csv_file, 'r', encoding='utf-8') as file:
+            reader = csv.reader(file)
+            rows = list(reader)
+        
+        # Identify header row - look for the row containing "ФИО" (full name) or similar indicators
+        header_idx = None
+        for i, row in enumerate(rows):
+            for cell in row:
+                if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell):
+                    header_idx = i
+                    break
+            if header_idx is not None:
+                break
+        
+        if header_idx is None:
+            # Check if this file contains class and name columns that identify it as a student data file
+            # Even if the header doesn't contain ФИО, we might still be able to identify student data
+            has_class_indicators = any(
+                any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class'])
+                for row in rows[:min(len(rows), 10)]  # Check first 10 rows
+            )
+            
+            has_name_indicators = any(
+                any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname'])
+                for row in rows[:min(len(rows), 10)]  # Check first 10 rows
+            )
+            
+            if has_class_indicators and has_name_indicators:
+                # Try to find the header row by looking for class and name indicators
+                for i, row in enumerate(rows):
+                    if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \
+                       any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']):
+                        header_idx = i
+                        break
+            
+            if header_idx is None:
+                print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns")
+                return False
+        
+        # Find teacher-subject mappings in the first 0-15 rows before the header
+        teacher_subject_map = {}
+        
+        # Build a mapping of subject names in the header row
+        header_row = rows[header_idx]
+        header_subjects = {}
+        for col_idx, subject_name in enumerate(header_row):
+            subject_name = str(subject_name).strip()
+            if (subject_name and 
+                subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкафчика', 'локера'] and
+                subject_name.strip() != "" and
+                "ф.и.о" not in subject_name.lower() and
+                "сортировка" not in subject_name.lower() and
+                "номер" not in subject_name.lower() and
+                "№" not in subject_name):
+                header_subjects[col_idx] = subject_name  # Map column index to subject name
+        
+        # First, try to find teachers in the rows before the header
+        for i in range(min(15, header_idx)):  # Check first 15 rows before header
+            current_row = rows[i]
+            
+            # Process all cells in the row to find teacher names and their adjacent context
+            for j, cell_value in enumerate(current_row):
+                cell_str = str(cell_value).strip()
+                
+                # Check if this cell is a likely teacher name
+                if self._is_likely_teacher_name(cell_str):
+                    # Look for context on the left (department) and right (subject)
+                    left_context = ""
+                    right_context = ""
+                    
+                    # Get left neighbor (department)
+                    if j > 0 and j-1 < len(current_row):
+                        left_context = str(current_row[j-1]).strip()
+                    
+                    # Get right neighbor (subject)
+                    if j < len(current_row) - 1:
+                        right_context = str(current_row[j+1]).strip()
+                    
+                    # Try to determine the subject based on adjacency
+                    matched_subject = None
+                    
+                    # First priority: right neighbor if it matches a subject in the header
+                    if right_context and j+1 in header_subjects:
+                        matched_subject = header_subjects[j+1]
+                    # Second priority: use left context if it semantically relates to a teacher
+                    elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
+                        # If left context indicates a department, look for subject to the right of teacher
+                        if j+1 in header_subjects:
+                            matched_subject = header_subjects[j+1]
+                        # If no subject to the right, try to map by position
+                        elif j in header_subjects:
+                            matched_subject = header_subjects[j]
+                    # Third priority: try to map by position
+                    elif j in header_subjects:
+                        matched_subject = header_subjects[j]
+                    
+                    # Only add if we don't have a better teacher name for this subject yet
+                    if matched_subject and (matched_subject not in teacher_subject_map or 
+                                            'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
+                        teacher_subject_map[matched_subject] = cell_str
+                
+                # If the cell contains multiple names (separated by newlines), process each separately
+                elif '\n' in cell_str or '\\n' in cell_str:
+                    cell_parts = [part.strip() for part in cell_str.replace('\\n', '\n').split('\n') if part.strip()]
+                    for part in cell_parts:
+                        if self._is_likely_teacher_name(part):
+                            # Look for context on the left (department) and right (subject)
+                            left_context = ""
+                            right_context = ""
+                            
+                            # Get left neighbor (department)
+                            if j > 0 and j-1 < len(current_row):
+                                left_context = str(current_row[j-1]).strip()
+                            
+                            # Get right neighbor (subject)
+                            if j < len(current_row) - 1:
+                                right_context = str(current_row[j+1]).strip()
+                            
+                            # Try to determine the subject based on adjacency
+                            matched_subject = None
+                            
+                            # First priority: right neighbor if it matches a subject in the header
+                            if right_context and j+1 in header_subjects:
+                                matched_subject = header_subjects[j+1]
+                            # Second priority: use left context if it semantically relates to a teacher
+                            elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
+                                # If left context indicates a department, look for subject to the right of teacher
+                                if j+1 in header_subjects:
+                                    matched_subject = header_subjects[j+1]
+                                # If no subject to the right, try to map by position
+                                elif j in header_subjects:
+                                    matched_subject = header_subjects[j]
+                            # Third priority: try to map by position
+                            elif j in header_subjects:
+                                matched_subject = header_subjects[j]
+                            
+                            # Only add if we don't have a better teacher name for this subject yet
+                            if matched_subject and (matched_subject not in teacher_subject_map or 
+                                                    'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
+                                teacher_subject_map[matched_subject] = part
+                                
+        # Additional teacher-subject mapping: scan the rows immediately before the header for teacher names in subject columns
+        # In many CSV files, teacher names appear in the same rows as subject headers
+        for i in range(max(0, header_idx - 5), header_idx):  # Check 5 rows before header
+            current_row = rows[i]
+            for j, cell_value in enumerate(current_row):
+                cell_str = str(cell_value).strip()
+                
+                # If cell contains a likely teacher name and corresponds to a subject column
+                if self._is_likely_teacher_name(cell_str) and j in header_subjects:
+                    subject_name = header_subjects[j]
+                    # Only add if we don't have a better teacher name for this subject yet
+                    if (subject_name not in teacher_subject_map or 
+                        'Default Teacher for' in teacher_subject_map.get(subject_name, '')):
+                        teacher_subject_map[subject_name] = cell_str
+        
+        # Additional validation: Remove any teacher-subject mappings that seem incorrect
+        validated_teacher_subject_map = {}
+        for subject, teacher in teacher_subject_map.items():
+            # Only add to validated map if teacher name passes all checks
+            if self._is_likely_teacher_name(teacher):
+                validated_teacher_subject_map[subject] = teacher
+            else:
+                print(f"Warning: Invalid teacher name '{teacher}' detected for subject '{subject}', skipping...")
+        
+        teacher_subject_map = validated_teacher_subject_map
+        
+        # Process each student row
+        for student_row in rows[header_idx + 1:]:
+            # Determine the structure dynamically based on the header
+            class_col_idx = None
+            name_col_idx = None
+            
+            # Find the index of the class column (usually called "Класс")
+            for idx, header in enumerate(header_row):
+                if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header):
+                    class_col_idx = idx
+                    break
+            
+            # Find the index of the name column (usually called "ФИО")
+            for idx, header in enumerate(header_row):
+                if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower():
+                    name_col_idx = idx
+                    break
+            
+            # If we couldn't find the columns properly, skip this row
+            if class_col_idx is None or name_col_idx is None:
+                continue
+            
+            # Check if this row has valid data in the expected columns
+            if (len(student_row) > max(class_col_idx, name_col_idx) and
+                student_row[class_col_idx].strip() and  # class name exists
+                student_row[name_col_idx].strip() and   # student name exists
+                self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)):
+                
+                name = student_row[name_col_idx].strip()  # Name column
+                class_name = student_row[class_col_idx].strip()  # Class column
+                
+                # Insert student into the database
+                self.cursor.execute(
+                    "INSERT OR IGNORE INTO students (class_name, full_name) VALUES (?, ?)",
+                    (class_name, name)
+                )
+                
+                # Get the student_id for this student
+                self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, class_name))
+                student_id_result = self.cursor.fetchone()
+                if student_id_result is None:
+                    continue
+                student_id = student_id_result[0]
+                
+                # Process schedule data for this student
+                # Go through each column to find subject and group info
+                for col_idx, cell_value in enumerate(student_row):
+                    if cell_value and col_idx < len(header_row):
+                        # Get the subject from the header
+                        subject_header = header_row[col_idx] if col_idx < len(header_row) else ""
+                        
+                        # Skip columns that don't contain schedule information
+                        if (col_idx == 0 or col_idx == 1 or col_idx == 2 or col_idx == class_col_idx or col_idx == name_col_idx or  # skip metadata cols
+                            "сортировка" in subject_header.lower() or
+                            "номер" in subject_header.lower() or
+                            "шкафчика" in subject_header.lower() or
+                            "локера" in subject_header.lower()):
+                            continue
+                        
+                        # Extract group information from the cell
+                        group_assignment = cell_value.strip()
+                        
+                        if group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
+                            # Find the teacher associated with this subject
+                            subject_name = str(subject_header).strip()
+                            teacher_name = teacher_subject_map.get(subject_name, f"Default Teacher for {subject_name}")
+                            
+                            # Insert the entities into their respective tables first
+                            # Then get their IDs to create the schedule entry
+                            self._process_schedule_entry_with_teacher_mapping(
+                                student_id, group_assignment, subject_name, teacher_name
+                            )
+        
+        self.conn.commit()
+        return True
+    
+    def _is_valid_student_record_by_cols(self, row, class_col_idx, name_col_idx):
+        """Check if a row represents a valid student record based on specific columns"""
+        # A valid student record should have:
+        # - Non-empty class name in the class column
+        # - Non-empty student name in the name column
+        
+        if len(row) <= max(class_col_idx, name_col_idx):
+            return False
+            
+        class_name = row[class_col_idx].strip() if len(row) > class_col_idx else ""
+        student_name = row[name_col_idx].strip() if len(row) > name_col_idx else ""
+        
+        # Check if the class name looks like an actual class (contains a number followed by a letter)
+        class_pattern = r'^\d+[А-ЯA-Z]$'  # e.g., 6А, 11А, 4B
+        if re.match(class_pattern, class_name):
+            return bool(student_name and student_name != class_name)  # Ensure name exists and is different from class
+        
+        # If not matching class pattern, check if the name field is not just another class-like value
+        name_pattern = r'^\d+[А-ЯA-Z]$'  # This would indicate it's probably a class, not a name
+        if re.match(name_pattern, student_name):
+            return False  # This row has a class in the name field, so not valid
+        
+        return bool(class_name and student_name and class_name != student_name)
+    
+    def _process_schedule_entry_with_teacher_mapping(self, student_id, group_info, subject_info, teacher_name):
+        """Process individual schedule entries with explicit teacher mapping and insert into normalized tables"""
+        # Clean up the inputs
+        subject_name = subject_info.strip() if subject_info.strip() else "General Class"
+        group_assignment = group_info.strip()
+        
+        # Only proceed if we have valid data
+        if subject_name and group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
+            # Insert subject if not exists and get its ID
+            self.cursor.execute("INSERT OR IGNORE INTO subjects (name) VALUES (?)", (subject_name,))
+            self.cursor.execute("SELECT subject_id FROM subjects WHERE name = ?", (subject_name,))
+            subject_id = self.cursor.fetchone()[0]
+            
+            # Insert teacher if not exists and get its ID
+            # Use the teacher name as is, without default creation if not found
+            self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (teacher_name,))
+            self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (teacher_name,))
+            teacher_result = self.cursor.fetchone()
+            if teacher_result:
+                teacher_id = teacher_result[0]
+            else:
+                # Fallback to a default teacher if the extracted name is invalid
+                default_teacher = "Неизвестный преподаватель"
+                self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (default_teacher,))
+                self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (default_teacher,))
+                teacher_id = self.cursor.fetchone()[0]
+            
+            # Use a default day for now (in a real system, we'd extract this from the schedule)
+            # For now, we'll randomly assign to a day of the week
+            import random
+            days_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
+            selected_day = random.choice(days_list)
+            self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (selected_day,))
+            self.cursor.execute("SELECT day_id FROM days WHERE name = ?", (selected_day,))
+            day_id = self.cursor.fetchone()[0]
+            
+            # Use a default period - for now we'll use period 1, but in a real system 
+            # we would need to extract this from the CSV if available
+            self.cursor.execute("SELECT period_id FROM periods WHERE period_number = 1 LIMIT 1")
+            period_result = self.cursor.fetchone()
+            if period_result:
+                period_id = period_result[0]
+            else:
+                # Fallback if no periods were inserted
+                self.cursor.execute("SELECT period_id FROM periods LIMIT 1")
+                period_id = self.cursor.fetchone()[0]
+            
+            # Clean the group name to separate it from student data
+            group_name = self._clean_group_name(group_assignment)
+            self.cursor.execute("INSERT OR IGNORE INTO groups (name) VALUES (?)", (group_name,))
+            self.cursor.execute("SELECT group_id FROM groups WHERE name = ?", (group_name,))
+            group_id = self.cursor.fetchone()[0]
+            
+            # Insert the schedule entry
+            self.cursor.execute("""
+                INSERT OR IGNORE INTO schedule (student_id, subject_id, teacher_id, day_id, period_id, group_id)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (student_id, subject_id, teacher_id, day_id, period_id, group_id))
+    
+    def _clean_group_name(self, raw_group_data):
+        """Extract clean group name from potentially mixed student/group data"""
+        # Remove potential student names from the group data
+        # Group names typically contain numbers, class identifiers, or specific activity names
+        cleaned = raw_group_data.strip()
+        
+        # If the group data looks like it contains a student name pattern,
+        # we'll try to extract just the group identifier part
+        if re.match(r'^\d+[А-ЯA-Z]', cleaned):
+            # This looks like a class designation, return as is
+            return cleaned
+        
+        # If the group data contains common group indicators, return as is
+        group_indicators = ['кл', 'class', 'club', 'track', 'group', 'module', '-']
+        if any(indicator in cleaned.lower() for indicator in group_indicators):
+            return cleaned
+        
+        # If the group data looks like a subject-identifier pattern, return as is
+        subject_indicators = ['ICT', 'English', 'Math', 'Physics', 'Chemistry', 'Biology', 'Science']
+        if any(indicator in cleaned for indicator in subject_indicators):
+            return cleaned
+        
+        # If none of the above conditions match, return a generic group name
+        return f"Group_{hash(cleaned) % 10000}"
+    
+    def _is_likely_teacher_name(self, text):
+        """Check if the text is likely to be a teacher name"""
+        if not text or len(text.strip()) < 5:  # Require minimum length for a name
+            return False
+        
+        text = text.strip()
+        
+        # Common non-name values that appear in the CSV
+        common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group']
+        if text.lower() in common_non_names:
+            return False
+        
+        # Exclusion patterns for non-teacher entries
+        exclusion_patterns = [
+            r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$',      # E4 E5 pattern
+            r'^[A-Z]\d+\s+[A-Z]\d+$',       # English groups
+            r'.*[Tt]rack.*',                 # Track identifiers
+            r'.*[Gg]roup.*',                 # Group identifiers
+            r'.*\d+[А-ЯA-Z]\d*$',           # Number-letter combos
+            r'^[А-ЯЁA-Z].*\d+',              # Text ending with digits
+            r'.*[Cc]lub.*',                  # Club identifiers
+        ]
+        
+        for pattern in exclusion_patterns:
+            if re.match(pattern, text, re.IGNORECASE):
+                return False
+        
+        # Positive patterns for teacher names
+        teacher_patterns = [
+            r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.\s*[А-ЯЁ]\.$',     # Иванов А.А.
+            r'^[А-ЯЁ]\.\s*[А-ЯЁ]\.\s+[А-ЯЁ][а-яё]+$',     # А.А. Иванов
+            r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Full name
+            r'^[A-Z][a-z]+\s+[A-Z][a-z]+$',               # John Smith
+            r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$',        # Smith J.J.
+            r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$',          # Russian names without patronymic
+        ]
+        
+        for pattern in teacher_patterns:
+            if re.match(pattern, text.strip()):
+                return True
+        
+        # Additional check: if it looks like a proper name (with capital letters and min length)
+        # and doesn't match exclusion patterns
+        name_parts = text.split()
+        if len(name_parts) >= 2:
+            # At least two parts (first name + last name)
+            # Check if they start with capital letters
+            if all(part[0].isupper() for part in name_parts if len(part) > 1):
+                return True
+        
+        return False
+    
+    def _is_likely_subject_label(self, text):
+        """Check if text is likely a subject label like 'Матем.', 'Информ.', 'Англ.яз', etc."""
+        if not text or len(text) < 2:
+            return False
+        
+        # Common Russian abbreviations for subjects
+        subject_patterns = [
+            'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История', 
+            'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство', 
+            'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths',
+            'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR',
+            'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics',
+            'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype', 
+            'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track',
+            'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track',
+            'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding',
+            'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports',
+            'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance',
+            'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship'
+        ]
+        
+        text_clean = text.strip().lower()
+        for pattern in subject_patterns:
+            if pattern.lower() in text_clean:
+                return True
+        
+        # Also check for specific subject names found in the data
+        specific_subjects = ['матем.', 'информ.', 'англ.яз', 'русск.яз', 'каб.', 'business', 'maths',
+                             'speaking', 'ict', 'geography', 'physics', 'robotics', 'science', 'ai core',
+                             'vr/ar', 'cybersafety', 'design', 'prototype', 'mediacom', 'culinary',
+                             'physical education', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics',
+                             'climbing', 'games', 'art', 'music', 'dance', 'karate', 'chess', 'leadership']
+        for subj in specific_subjects:
+            if subj in text_clean:
+                return True
+        
+        return False
+    
+    def _find_matching_subject_in_header_from_list(self, subject_label, header_subjects, header_row):
+        """Find the matching full subject name in the header based on the label"""
+        if not subject_label:
+            return None
+        
+        # Look for the best match in the header subjects
+        subject_label_lower = subject_label.lower().replace('.', '').replace('яз', 'язык')
+        
+        # Direct match first
+        for col_idx, full_subj in header_subjects:
+            if subject_label_lower in full_subj.lower() or full_subj.lower() in subject_label_lower:
+                return full_subj
+        
+        # If no direct match, try to find by partial matching in the whole header row
+        for i, header_item in enumerate(header_row):
+            if subject_label_lower in str(header_item).lower() or str(header_item).lower() in subject_label_lower:
+                return str(header_item).strip()
+        
+        # Try more general matching - if label contains common abbreviations
+        for col_idx, full_subj in header_subjects:
+            full_lower = full_subj.lower()
+            if ('матем' in subject_label_lower and 'матем' in full_lower) or \
+               ('информ' in subject_label_lower and 'информ' in full_lower) or \
+               ('англ' in subject_label_lower and 'англ' in full_lower) or \
+               ('русск' in subject_label_lower and 'русск' in full_lower) or \
+               ('физик' in subject_label_lower and 'физик' in full_lower) or \
+               ('хим' in subject_label_lower and 'хим' in full_lower) or \
+               ('биол' in subject_label_lower and 'биол' in full_lower) or \
+               ('истор' in subject_label_lower and 'истор' in full_lower) or \
+               ('общ' in subject_label_lower and 'общ' in full_lower) or \
+               ('географ' in subject_label_lower and 'географ' in full_lower):
+                return full_subj
+        
+        return None
+    
+    def find_student(self, name_query):
+        """Search for students by name"""
+        self.cursor.execute("""
+            SELECT s.full_name, s.class_name 
+            FROM students s
+            WHERE s.full_name LIKE ? 
+            LIMIT 10
+        """, (f'%{name_query}%',))
+        
+        return self.cursor.fetchall()
+    
+    def get_current_class(self, student_name, current_day, current_time):
+        """Find student's current class"""
+        self.cursor.execute("""
+            SELECT sub.name, t.name, p.start_time, p.end_time 
+            FROM schedule sch
+            JOIN students s ON sch.student_id = s.student_id
+            JOIN subjects sub ON sch.subject_id = sub.subject_id
+            JOIN teachers t ON sch.teacher_id = t.teacher_id
+            JOIN days d ON sch.day_id = d.day_id
+            JOIN periods p ON sch.period_id = p.period_id
+            JOIN groups g ON sch.group_id = g.group_id
+            WHERE s.full_name = ? 
+            AND d.name = ? 
+            AND p.start_time <= ? 
+            AND p.end_time >= ?
+        """, (student_name, current_day, current_time, current_time))
+        
+        return self.cursor.fetchone()
+    
+    def close(self):
+        """Close database connection"""
+        self.conn.close()
+
+# Main execution - just setup database
+if __name__ == "__main__":
+    db = SchoolScheduleDB()
+    # Check if auto-update flag is passed as argument
+    auto_update = len(sys.argv) > 1 and sys.argv[1] == '--auto'
+    db.update_database_from_csv(auto_update=auto_update)
+    db.close()