1003 lines
51 KiB
Python
1003 lines
51 KiB
Python
#!/usr/bin/env python
|
||
"""
|
||
database.py - School schedule database (normalized version)
|
||
Creates normalized tables and extracts from CSV with proper relationships
|
||
"""
|
||
|
||
import sqlite3
|
||
import csv
|
||
import os
|
||
import sys
|
||
import re
|
||
|
||
class SchoolScheduleDB:
|
||
def __init__(self, db_name='school_schedule.db'):
|
||
self.conn = sqlite3.connect(db_name)
|
||
self.cursor = self.conn.cursor()
|
||
# Initialize database tables
|
||
self.create_tables()
|
||
|
||
def normalize_class_name(self, class_name):
|
||
"""Normalize class names to handle Cyrillic/Latin character differences"""
|
||
if not class_name:
|
||
return class_name
|
||
|
||
# Replace Cyrillic characters with Latin equivalents in class names
|
||
# Specifically: replace Cyrillic А (U+0410) with Latin A (U+0041)
|
||
normalized = class_name.replace('А', 'A').replace('В', 'B').replace('С', 'C')
|
||
return normalized
|
||
|
||
def create_tables(self):
|
||
"""Create normalized tables with proper relationships"""
|
||
# Teachers table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS teachers (
|
||
teacher_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL,
|
||
email TEXT,
|
||
phone TEXT
|
||
)
|
||
""")
|
||
|
||
# Subjects table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS subjects (
|
||
subject_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL,
|
||
description TEXT
|
||
)
|
||
""")
|
||
|
||
# Days table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS days (
|
||
day_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL -- e.g., Monday, Tuesday, etc.
|
||
)
|
||
""")
|
||
|
||
# Periods table - with proper unique constraint
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS periods (
|
||
period_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
period_number INTEGER,
|
||
start_time TEXT,
|
||
end_time TEXT,
|
||
UNIQUE(period_number, start_time, end_time)
|
||
)
|
||
""")
|
||
|
||
# Groups table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS groups (
|
||
group_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL,
|
||
description TEXT,
|
||
class_name TEXT
|
||
)
|
||
""")
|
||
|
||
# Students table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS students (
|
||
student_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
class_name TEXT,
|
||
full_name TEXT NOT NULL,
|
||
UNIQUE(full_name, class_name) -- Prevent duplicate student entries
|
||
)
|
||
""")
|
||
|
||
# Homeroom teachers table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS homeroom_teachers (
|
||
homeroom_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
class_name TEXT UNIQUE,
|
||
teacher_name TEXT,
|
||
classroom TEXT,
|
||
parent_meeting_room TEXT,
|
||
internal_number TEXT,
|
||
mobile_number TEXT
|
||
)
|
||
""")
|
||
|
||
# Schedule table with foreign key relationships
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS schedule (
|
||
entry_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
student_id INTEGER,
|
||
subject_id INTEGER,
|
||
teacher_id INTEGER,
|
||
day_id INTEGER,
|
||
period_id INTEGER,
|
||
group_id INTEGER,
|
||
FOREIGN KEY (student_id) REFERENCES students(student_id),
|
||
FOREIGN KEY (subject_id) REFERENCES subjects(subject_id),
|
||
FOREIGN KEY (teacher_id) REFERENCES teachers(teacher_id),
|
||
FOREIGN KEY (day_id) REFERENCES days(day_id),
|
||
FOREIGN KEY (period_id) REFERENCES periods(period_id),
|
||
FOREIGN KEY (group_id) REFERENCES groups(group_id)
|
||
)
|
||
""")
|
||
|
||
self.conn.commit()
|
||
|
||
def populate_periods_table(self):
|
||
"""Populate the periods table with standard school periods"""
|
||
period_times = {
|
||
'1': ('09:00', '09:40'),
|
||
'2': ('10:00', '10:40'),
|
||
'3': ('11:00', '11:40'),
|
||
'4': ('11:50', '12:30'),
|
||
'5': ('12:40', '13:20'),
|
||
'6': ('13:30', '14:10'),
|
||
'7': ('14:20', '15:00'),
|
||
'8': ('15:20', '16:00'),
|
||
'9': ('16:15', '16:55'),
|
||
'10': ('17:05', '17:45'),
|
||
'11': ('17:55', '18:35'),
|
||
'12': ('18:45', '19:20'),
|
||
'13': ('19:20', '20:00')
|
||
}
|
||
|
||
for period_num, (start_time, end_time) in period_times.items():
|
||
self.cursor.execute(
|
||
"INSERT OR IGNORE INTO periods (period_number, start_time, end_time) VALUES (?, ?, ?)",
|
||
(int(period_num), start_time, end_time)
|
||
)
|
||
|
||
# Add days of the week
|
||
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
||
for day in days_of_week:
|
||
self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (day,))
|
||
|
||
self.conn.commit()
|
||
|
||
def update_database_from_csv(self, auto_update=False):
|
||
"""Update the database from CSV files in the sample_data directory"""
|
||
# Define the sample_data directory path
|
||
sample_data_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/Projects/sample_data"
|
||
|
||
# Check if the directory exists
|
||
if not os.path.exists(sample_data_dir):
|
||
print(f"Directory {sample_data_dir} does not exist.")
|
||
return
|
||
|
||
# Find all CSV files in the sample_data directory
|
||
csv_files = [f for f in os.listdir(sample_data_dir) if f.lower().endswith('.csv')]
|
||
|
||
if not csv_files:
|
||
print(f"No CSV files found in {sample_data_dir}")
|
||
return
|
||
|
||
print(f"Found {len(csv_files)} student data CSV file(s):")
|
||
for i, file in enumerate(csv_files, 1):
|
||
print(f" {i}. {file}")
|
||
|
||
if auto_update:
|
||
print("\nAuto-updating database with all student data CSV files...")
|
||
print("\nUpdating database with {} file(s):".format(len(csv_files)))
|
||
for file in csv_files:
|
||
print(f" - {file}")
|
||
csv_path = os.path.join(sample_data_dir, file)
|
||
self.process_csv_with_teacher_mapping(csv_path)
|
||
else:
|
||
print("\nChoose an option:")
|
||
print("1. Update all CSV files")
|
||
print("2. Select specific CSV files")
|
||
print("3. Cancel")
|
||
|
||
choice = input("Enter your choice (1-3): ").strip()
|
||
|
||
if choice == "1":
|
||
print("\nUpdating database with all CSV files...")
|
||
for file in csv_files:
|
||
print(f"Processing {file}...")
|
||
csv_path = os.path.join(sample_data_dir, file)
|
||
self.process_csv_with_teacher_mapping(csv_path)
|
||
elif choice == "2":
|
||
print("\nAvailable CSV files:")
|
||
for i, file in enumerate(csv_files, 1):
|
||
print(f" {i}. {file}")
|
||
|
||
selections = input("\nEnter the numbers of the files to process (comma-separated): ")
|
||
try:
|
||
indices = [int(x.strip()) - 1 for x in selections.split(',')]
|
||
valid_indices = [i for i in indices if 0 <= i < len(csv_files)]
|
||
|
||
for i in valid_indices:
|
||
file = csv_files[i]
|
||
print(f"Processing {file}...")
|
||
csv_path = os.path.join(sample_data_dir, file)
|
||
self.process_csv_with_teacher_mapping(csv_path)
|
||
except ValueError:
|
||
print("Invalid input. Please enter comma-separated numbers.")
|
||
else:
|
||
print("Update cancelled.")
|
||
|
||
# Update teachers from the Teachers.csv file
|
||
teachers_file = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/Projects/Teachers.csv"
|
||
if os.path.exists(teachers_file):
|
||
print("Processing teachers from Teachers.csv...")
|
||
self.update_teachers_from_cheat_sheet(teachers_file)
|
||
else:
|
||
print("Teachers.csv file not found, skipping...")
|
||
|
||
# Update homeroom teachers from the dedicated file
|
||
homeroom_file = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/Projects/Homeroom_teachers.csv"
|
||
if os.path.exists(homeroom_file):
|
||
print("Processing homeroom teachers...")
|
||
self.update_homeroom_teachers(homeroom_file)
|
||
else:
|
||
print("Homeroom teachers file not found, skipping...")
|
||
|
||
print("Database updated successfully with selected CSV data.")
|
||
|
||
def update_homeroom_teachers(self, homeroom_file):
|
||
"""Update homeroom teachers from the dedicated CSV file"""
|
||
with open(homeroom_file, 'r', encoding='utf-8') as file:
|
||
reader = csv.DictReader(file)
|
||
|
||
for row in reader:
|
||
# Normalize the class name to handle Cyrillic/Latin differences
|
||
normalized_class = self.normalize_class_name(row['Class'])
|
||
self.cursor.execute("""
|
||
INSERT OR REPLACE INTO homeroom_teachers
|
||
(class_name, teacher_name, classroom, parent_meeting_room, internal_number, mobile_number)
|
||
VALUES (?, ?, ?, ?, ?, ?)
|
||
""", (
|
||
normalized_class,
|
||
row['Homeroom Teacher'],
|
||
row['Classroom'],
|
||
row['Parent Meeting Room'],
|
||
row['Internal Number'],
|
||
row['Mobile Number']
|
||
))
|
||
|
||
self.conn.commit()
|
||
print("Homeroom teachers updated successfully.")
|
||
|
||
def process_csv_with_teacher_mapping(self, csv_file):
|
||
"""Process CSV with teacher-subject mapping based on positional order"""
|
||
if not os.path.exists(csv_file):
|
||
return False
|
||
|
||
with open(csv_file, 'r', encoding='utf-8') as file:
|
||
reader = csv.reader(file)
|
||
rows = list(reader)
|
||
|
||
# Identify header row - look for the row containing "ФИО" (full name) or similar indicators
|
||
header_idx = None
|
||
for i, row in enumerate(rows):
|
||
for cell in row:
|
||
if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell):
|
||
header_idx = i
|
||
break
|
||
if header_idx is not None:
|
||
break
|
||
|
||
if header_idx is None:
|
||
# Check if this file contains class and name columns that identify it as a student data file
|
||
# Even if the header doesn't contain ФИО, we might still be able to identify student data
|
||
has_class_indicators = any(
|
||
any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class'])
|
||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||
)
|
||
|
||
has_name_indicators = any(
|
||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname'])
|
||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||
)
|
||
|
||
if has_class_indicators and has_name_indicators:
|
||
# Try to find the header row by looking for class and name indicators
|
||
for i, row in enumerate(rows):
|
||
if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \
|
||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']):
|
||
header_idx = i
|
||
break
|
||
|
||
if header_idx is None:
|
||
print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns")
|
||
return False
|
||
|
||
# Build a mapping of subject names in the header row
|
||
header_row = rows[header_idx]
|
||
header_subjects = {}
|
||
for col_idx, subject_name in enumerate(header_row):
|
||
subject_name = str(subject_name).strip()
|
||
if (subject_name and
|
||
subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкaфчика', 'локера'] and
|
||
subject_name.strip() != "" and
|
||
"ф.и.о" not in subject_name.lower() and
|
||
"сортировка" not in subject_name.lower() and
|
||
"номер" not in subject_name.lower() and
|
||
"№" not in subject_name):
|
||
header_subjects[col_idx] = subject_name # Map column index to subject name
|
||
|
||
# IMPROVED TEACHER-SUBJECT MAPPING: Extract teacher-subject pairs from the first rows
|
||
# Match base subjects to teachers and then map to header subjects
|
||
base_subject_teacher_map = {}
|
||
|
||
# Look through the first rows to find teacher-subject pairs
|
||
for i in range(min(len(rows), header_idx)): # Only go up to header row
|
||
current_row = rows[i]
|
||
|
||
# Process the row in pairs of (subject, teacher, group_info) pattern
|
||
j = 0
|
||
while j < len(current_row) - 1:
|
||
subject_cell = current_row[j].strip() if j < len(current_row) else ""
|
||
teacher_cell = current_row[j + 1].strip() if j + 1 < len(current_row) else ""
|
||
group_cell = current_row[j + 2].strip() if j + 2 < len(current_row) else ""
|
||
|
||
# Check if the first cell is a subject, the second is a teacher, and the third is a group
|
||
if (subject_cell and self._is_likely_subject_name_simple(subject_cell) and
|
||
teacher_cell and self._is_likely_teacher_name_enhanced(teacher_cell) and
|
||
group_cell and self._is_likely_group_identifier(group_cell)):
|
||
|
||
# Add to the base subject teacher map (if multiple teachers for same subject, store all)
|
||
if subject_cell not in base_subject_teacher_map:
|
||
base_subject_teacher_map[subject_cell] = []
|
||
if teacher_cell not in base_subject_teacher_map[subject_cell]:
|
||
base_subject_teacher_map[subject_cell].append(teacher_cell)
|
||
|
||
# Move to the next potential triplet (subject, teacher, group_info)
|
||
j += 3 # Skip subject, teacher, and group info
|
||
|
||
# Also check the row immediately before the header row for additional teacher-subject pairs
|
||
if header_idx > 0:
|
||
prev_row = rows[header_idx - 1]
|
||
j = 0
|
||
while j < len(prev_row) - 1:
|
||
subject_cell = prev_row[j].strip() if j < len(prev_row) else ""
|
||
teacher_cell = prev_row[j + 1].strip() if j + 1 < len(prev_row) else ""
|
||
group_cell = prev_row[j + 2].strip() if j + 2 < len(prev_row) else ""
|
||
|
||
# Check if the first cell is a subject, the second is a teacher, and the third is a group
|
||
if (subject_cell and self._is_likely_subject_name_simple(subject_cell) and
|
||
teacher_cell and self._is_likely_teacher_name_enhanced(teacher_cell) and
|
||
group_cell and self._is_likely_group_identifier(group_cell)):
|
||
|
||
# Add to the base subject teacher map (if multiple teachers for same subject, store all)
|
||
if subject_cell not in base_subject_teacher_map:
|
||
base_subject_teacher_map[subject_cell] = []
|
||
if teacher_cell not in base_subject_teacher_map[subject_cell]:
|
||
base_subject_teacher_map[subject_cell].append(teacher_cell)
|
||
|
||
# Move to the next potential triplet (subject, teacher, group_info)
|
||
j += 3 # Skip subject, teacher, and group info
|
||
|
||
# Now map the header subjects to the teachers using base subject matching
|
||
teacher_subject_map = {}
|
||
for col_idx, header_subject in header_subjects.items():
|
||
# Find the base subject that corresponds to this header subject
|
||
base_subject = self._find_base_subject(header_subject, base_subject_teacher_map.keys())
|
||
|
||
if base_subject and base_subject in base_subject_teacher_map:
|
||
# Use the first teacher for this base subject
|
||
teacher_subject_map[header_subject] = base_subject_teacher_map[base_subject][0]
|
||
|
||
# Process each student row
|
||
for student_row in rows[header_idx + 1:]:
|
||
# Determine the structure dynamically based on the header
|
||
class_col_idx = None
|
||
name_col_idx = None
|
||
|
||
# Find the index of the class column (usually called "Класс")
|
||
for idx, header in enumerate(header_row):
|
||
if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header).lower():
|
||
class_col_idx = idx
|
||
break
|
||
|
||
# Find the index of the name column (usually called "ФИО")
|
||
for idx, header in enumerate(header_row):
|
||
if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower():
|
||
name_col_idx = idx
|
||
break
|
||
|
||
# If we couldn't find the columns properly, skip this row
|
||
if class_col_idx is None or name_col_idx is None:
|
||
continue
|
||
|
||
# Check if this row has valid data in the expected columns
|
||
if (len(student_row) > max(class_col_idx, name_col_idx) and
|
||
student_row[class_col_idx].strip() and # class name exists
|
||
student_row[name_col_idx].strip() and # student name exists
|
||
self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)):
|
||
|
||
name = student_row[name_col_idx].strip() # Name column
|
||
class_name = student_row[class_col_idx].strip() # Class column
|
||
|
||
# Normalize the class name to handle Cyrillic/Latin differences
|
||
normalized_class = self.normalize_class_name(class_name)
|
||
|
||
# Insert student into the database (using INSERT OR REPLACE to prevent duplicates)
|
||
self.cursor.execute(
|
||
"INSERT OR REPLACE INTO students (class_name, full_name) VALUES (?, ?)",
|
||
(normalized_class, name)
|
||
)
|
||
|
||
# Get the student_id for this student
|
||
self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, normalized_class))
|
||
student_id_result = self.cursor.fetchone()
|
||
if student_id_result is None:
|
||
continue
|
||
student_id = student_id_result[0]
|
||
|
||
# Process schedule data for this student
|
||
# Go through each column to find subject and group info
|
||
for col_idx, cell_value in enumerate(student_row):
|
||
if cell_value and col_idx < len(header_row):
|
||
# Get the subject from the header
|
||
subject_header = header_row[col_idx] if col_idx < len(header_row) else ""
|
||
|
||
# Skip columns that don't contain schedule information
|
||
if (col_idx == 0 or col_idx == 1 or col_idx == 2 or col_idx == class_col_idx or col_idx == name_col_idx or # skip metadata cols
|
||
"сортировка" in subject_header.lower() or
|
||
"номер" in subject_header.lower() or
|
||
"шкaфчика" in subject_header.lower() or
|
||
"локера" in subject_header.lower()):
|
||
continue
|
||
|
||
# Extract group information from the cell
|
||
group_assignment = cell_value.strip()
|
||
|
||
if group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
|
||
# Find the teacher associated with this subject
|
||
subject_name = str(subject_header).strip()
|
||
teacher_name = teacher_subject_map.get(subject_name, f"Default Teacher for {subject_name}")
|
||
|
||
# Insert the entities into their respective tables first
|
||
# Then get their IDs to create the schedule entry
|
||
self._process_schedule_entry_with_teacher_mapping(
|
||
student_id, group_assignment, subject_name, teacher_name
|
||
)
|
||
|
||
self.conn.commit()
|
||
return True
|
||
|
||
def _find_base_subject(self, header_subject, base_subjects):
|
||
"""Find the base subject that corresponds to a header subject"""
|
||
header_lower = header_subject.lower()
|
||
|
||
# Check for direct matches first
|
||
for base_subject in base_subjects:
|
||
if base_subject.lower() in header_lower or header_lower in base_subject.lower():
|
||
return base_subject
|
||
|
||
# Check for partial matches with common patterns
|
||
for base_subject in base_subjects:
|
||
# Remove common suffixes from header subject and try to match
|
||
simplified_header = header_lower.replace(" 1 модуль", "").replace(" 2 модуль", "") \
|
||
.replace(" 1,2 модуль", "").replace(" 1 мод.", "").replace(" 2 мод.", "") \
|
||
.replace(" / ", " ").replace(" ", " ")
|
||
|
||
simplified_base = base_subject.lower().replace(" / ", " ").replace(" ", " ")
|
||
|
||
if simplified_base in simplified_header or simplified_header in simplified_base:
|
||
return base_subject
|
||
|
||
return None
|
||
|
||
def _is_likely_subject_name_simple(self, text):
|
||
"""Simple check if the text is likely a subject name"""
|
||
if not text or len(text.strip()) < 2:
|
||
return False
|
||
|
||
text = text.strip().lower()
|
||
|
||
# Common subject indicators in Russian and English
|
||
subject_indicators = [
|
||
'технотрек', 'матем', 'информ', 'англ', 'русск', 'физика', 'химия', 'биол', 'история',
|
||
'общество', 'география', 'литер', 'физкульт', 'лидерство',
|
||
'спорт. клуб', 'орксэ', 'китайск', 'немецк', 'француз', 'speaking club', 'maths',
|
||
'ict', 'geography', 'physics', 'robotics', 'culinary', 'science', 'ai core', 'vr/ar',
|
||
'cybersafety', 'business', 'design', 'prototype', 'mediacom', 'robotics track',
|
||
'culinary track', 'science track', 'ai core track', 'vr/ar track', 'cybersafety track',
|
||
'programming', 'algorithm', 'logic', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics',
|
||
'climbing', 'games', 'art', 'music', 'dance', 'karate', 'judo', 'chess', 'leadership',
|
||
'алгоритмика', 'робототехника', 'программирование', 'математика', 'информатика', 'орксэ',
|
||
'английский', 'русский', 'физическая культура', 'орксэ', 'изо', 'алгебра', 'геометрия',
|
||
'астрономия', 'экология', 'астрономия', 'иностранный', 'ит', 'computer science', 'informatics'
|
||
]
|
||
|
||
# Check if text contains any of the subject indicators
|
||
for indicator in subject_indicators:
|
||
if indicator in text:
|
||
return True
|
||
|
||
return False
|
||
|
||
def _is_likely_subject_name(self, text):
|
||
"""Check if the text is likely a subject name"""
|
||
if not text or len(text.strip()) < 2:
|
||
return False
|
||
|
||
text = text.strip()
|
||
|
||
# Common subject indicators in Russian and English
|
||
subject_indicators = [
|
||
'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История',
|
||
'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство',
|
||
'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths',
|
||
'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR',
|
||
'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics',
|
||
'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype',
|
||
'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track',
|
||
'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track',
|
||
'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding',
|
||
'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports',
|
||
'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance',
|
||
'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship',
|
||
'Технотрек 1 модуль', 'Технотрек 2 модуль', 'ОРКСЭ 1,2 модуль', 'Математика 1 модуль',
|
||
'Математика 2 модуль', 'Программирование', 'Алгоритмика и логика', 'Лидерство',
|
||
'Робототехника', 'Physical Education 1,2 модуль', 'Английский 1 модуль', 'Английский 2 модуль',
|
||
'Англ.яз', 'Русск.яз', 'Информ.', 'Матем.', 'Физика', 'Химия', 'Биология', 'История',
|
||
'Обществознание', 'География', 'Литература', 'Физическая культура', 'ОРКСЭ', 'ИЗО',
|
||
'Китайский', 'Немецкий', 'Французский', 'Алгебра', 'Геометрия', 'Астрономия', 'Экология'
|
||
]
|
||
|
||
# Check if text matches any of the subject indicators
|
||
for indicator in subject_indicators:
|
||
if indicator.lower() in text.lower():
|
||
return True
|
||
|
||
# Check if the text contains common subject-related keywords
|
||
common_keywords = ['модуль', 'track', 'club', 'group', 'class', 'lesson', 'subject', 'module', 'яз', 'язык']
|
||
for keyword in common_keywords:
|
||
if keyword in text.lower():
|
||
return True
|
||
|
||
# Check if text contains specific patterns that indicate it's a subject
|
||
subject_patterns = [
|
||
r'.*[Tt]rack.*', # Track identifiers
|
||
r'.*[Mm]odule.*', # Module identifiers
|
||
r'.*[Cc]lub.*', # Club identifiers
|
||
r'.*[Ss]ubject.*', # Subject identifiers
|
||
r'.*[Cc]lass.*', # Class identifiers
|
||
r'.*[Ll]esson.*', # Lesson identifiers
|
||
]
|
||
|
||
for pattern in subject_patterns:
|
||
if re.search(pattern, text):
|
||
return True
|
||
|
||
return False
|
||
|
||
def _is_valid_student_record_by_cols(self, row, class_col_idx, name_col_idx):
|
||
"""Check if a row represents a valid student record based on specific columns"""
|
||
# A valid student record should have:
|
||
# - Non-empty class name in the class column
|
||
# - Non-empty student name in the name column
|
||
|
||
if len(row) <= max(class_col_idx, name_col_idx):
|
||
return False
|
||
|
||
class_name = row[class_col_idx].strip() if len(row) > class_col_idx else ""
|
||
student_name = row[name_col_idx].strip() if len(row) > name_col_idx else ""
|
||
|
||
# Check if the class name looks like an actual class (contains a number followed by a letter)
|
||
class_pattern = r'^\d+[А-ЯA-Z]$' # e.g., 6А, 11А, 4B
|
||
if re.match(class_pattern, class_name):
|
||
return bool(student_name and student_name != class_name) # Ensure name exists and is different from class
|
||
|
||
# If not matching class pattern, check if the name field is not just another class-like value
|
||
name_pattern = r'^\d+[А-ЯA-Z]$' # This would indicate it's probably a class, not a name
|
||
if re.match(name_pattern, student_name):
|
||
return False # This row has a class in the name field, so not valid
|
||
|
||
return bool(class_name and student_name and class_name != student_name)
|
||
|
||
def _process_schedule_entry_with_teacher_mapping(self, student_id, group_info, subject_info, teacher_name):
|
||
"""Process individual schedule entries with explicit teacher mapping and insert into normalized tables"""
|
||
# Clean up the inputs
|
||
subject_name = subject_info.strip() if subject_info.strip() else "General Class"
|
||
group_assignment = group_info.strip()
|
||
|
||
# Only proceed if we have valid data
|
||
if subject_name and group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
|
||
# Insert subject if not exists and get its ID
|
||
self.cursor.execute("INSERT OR IGNORE INTO subjects (name) VALUES (?)", (subject_name,))
|
||
self.cursor.execute("SELECT subject_id FROM subjects WHERE name = ?", (subject_name,))
|
||
subject_id = self.cursor.fetchone()[0]
|
||
|
||
# Insert teacher if not exists and get its ID
|
||
# Use the teacher name as is, without default creation if not found
|
||
self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (teacher_name,))
|
||
self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (teacher_name,))
|
||
teacher_result = self.cursor.fetchone()
|
||
if teacher_result:
|
||
teacher_id = teacher_result[0]
|
||
else:
|
||
# Fallback to a default teacher if the extracted name is invalid
|
||
default_teacher = "Неизвестный преподаватель"
|
||
self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (default_teacher,))
|
||
self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (default_teacher,))
|
||
teacher_id = self.cursor.fetchone()[0]
|
||
|
||
# Use a default day for now (in a real system, we'd extract this from the schedule)
|
||
# For now, we'll randomly assign to a day of the week
|
||
import random
|
||
days_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
|
||
selected_day = random.choice(days_list)
|
||
self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (selected_day,))
|
||
self.cursor.execute("SELECT day_id FROM days WHERE name = ?", (selected_day,))
|
||
day_id = self.cursor.fetchone()[0]
|
||
|
||
# Use a default period - for now we'll use period 1, but in a real system
|
||
# we would need to extract this from the CSV if available
|
||
self.cursor.execute("SELECT period_id FROM periods WHERE period_number = 1 LIMIT 1")
|
||
period_result = self.cursor.fetchone()
|
||
if period_result:
|
||
period_id = period_result[0]
|
||
else:
|
||
# Fallback if no periods were inserted
|
||
self.cursor.execute("SELECT period_id FROM periods LIMIT 1")
|
||
period_id = self.cursor.fetchone()[0]
|
||
|
||
# Clean the group name to separate it from student data
|
||
group_name = self._clean_group_name(group_assignment)
|
||
self.cursor.execute("INSERT OR IGNORE INTO groups (name) VALUES (?)", (group_name,))
|
||
self.cursor.execute("SELECT group_id FROM groups WHERE name = ?", (group_name,))
|
||
group_id = self.cursor.fetchone()[0]
|
||
|
||
# Insert the schedule entry
|
||
self.cursor.execute("""
|
||
INSERT OR IGNORE INTO schedule (student_id, subject_id, teacher_id, day_id, period_id, group_id)
|
||
VALUES (?, ?, ?, ?, ?, ?)
|
||
""", (student_id, subject_id, teacher_id, day_id, period_id, group_id))
|
||
|
||
def _clean_group_name(self, raw_group_data):
|
||
"""Extract clean group name from potentially mixed student/group data"""
|
||
# Remove potential student names from the group data
|
||
# Group names typically contain numbers, class identifiers, or specific activity names
|
||
cleaned = raw_group_data.strip()
|
||
|
||
# If the group data looks like it contains a student name pattern,
|
||
# we'll try to extract just the group identifier part
|
||
if re.match(r'^\d+[А-ЯA-Z]', cleaned):
|
||
# This looks like a class designation, return as is
|
||
return cleaned
|
||
|
||
# If the group data contains common group indicators, return as is
|
||
group_indicators = ['кл', 'class', 'club', 'track', 'group', 'module', '-']
|
||
if any(indicator in cleaned.lower() for indicator in group_indicators):
|
||
return cleaned
|
||
|
||
# If the group data looks like a subject-identifier pattern, return as is
|
||
subject_indicators = ['ICT', 'English', 'Math', 'Physics', 'Chemistry', 'Biology', 'Science']
|
||
if any(indicator in cleaned for indicator in subject_indicators):
|
||
return cleaned
|
||
|
||
# If none of the above conditions match, return a generic group name
|
||
return f"Group_{hash(cleaned) % 10000}"
|
||
|
||
def _is_likely_teacher_name(self, text):
|
||
"""Check if the text is likely to be a teacher name"""
|
||
if not text or len(text.strip()) < 5: # Require minimum length for a name
|
||
return False
|
||
|
||
text = text.strip()
|
||
|
||
# Common non-name values that appear in the CSV
|
||
common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group', 'каб.', 'гр.', 'фитнес', 'каб', 'все группы', '1 группа', '2 группа', 'Е1', 'Е2', 'Е3', 'Е4', 'Е5', 'Е6', 'Е1 Е2', 'Е4 Е5', 'E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'гр 1', 'гр 2']
|
||
if text.lower() in common_non_names:
|
||
return False
|
||
|
||
# Exclusion patterns for non-teacher entries
|
||
exclusion_patterns = [
|
||
r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$', # E4 E5 pattern
|
||
r'^[A-Z]\d+\s+[A-Z]\d+$', # English groups
|
||
r'.*[Tt]rack.*', # Track identifiers
|
||
r'.*[Gg]roup.*', # Group identifiers
|
||
r'.*\d+[А-ЯA-Z]\d*$', # Number-letter combos
|
||
r'^[А-ЯЁA-Z].*\d+', # Text ending with digits
|
||
r'.*[Cc]lub.*', # Club identifiers
|
||
r'.*[Rr]oom.*', # Room identifiers
|
||
r'.*[Cc]lass.*', # Class identifiers
|
||
r'.*[Pp]eriod.*', # Period identifiers
|
||
r'^\d+$', # Just numbers
|
||
r'^[А-ЯЁA-Z]*$', # All caps words
|
||
r'^[А-ЯЁA-Z\s\d]+$', # Caps words and numbers (likely room numbers)
|
||
r'^[ЕеEe][\d\s,]+$' # Room identifiers like E1, E2, etc.
|
||
]
|
||
|
||
for pattern in exclusion_patterns:
|
||
if re.match(pattern, text, re.IGNORECASE):
|
||
return False
|
||
|
||
# Positive patterns for teacher names
|
||
teacher_patterns = [
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.\s*[А-ЯЁ]\.$', # Иванов А.А.
|
||
r'^[А-ЯЁ]\.\s*[А-ЯЁ]\.\s+[А-ЯЁ][а-яё]+$', # А.А. Иванов
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Full name
|
||
r'^[A-Z][a-z]+\s+[A-Z][a-z]+$', # John Smith
|
||
r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$', # Smith J.J.
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Russian names without patronymic
|
||
r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$', # Initials format
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+', # Names without periods
|
||
]
|
||
|
||
for pattern in teacher_patterns:
|
||
if re.match(pattern, text.strip()):
|
||
return True
|
||
|
||
# Additional check: if it looks like a proper name (with capital letters and min length)
|
||
# and doesn't match exclusion patterns
|
||
name_parts = text.split()
|
||
if len(name_parts) >= 2:
|
||
# At least two parts (first name + last name)
|
||
# Check if they start with capital letters
|
||
if all(part[0].isupper() for part in name_parts if len(part) > 1):
|
||
# Additional check: make sure it's not just a title or other text
|
||
common_titles = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Teacher', 'Instructor', 'Coach']
|
||
if any(title in text for title in common_titles):
|
||
return False
|
||
return True
|
||
|
||
return False
|
||
|
||
def _is_likely_subject_label(self, text):
|
||
"""Check if text is likely a subject label like 'Матем.', 'Информ.', 'Англ.яз', etc."""
|
||
if not text or len(text) < 2:
|
||
return False
|
||
|
||
# Common Russian abbreviations for subjects
|
||
subject_patterns = [
|
||
'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История',
|
||
'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство',
|
||
'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths',
|
||
'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR',
|
||
'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics',
|
||
'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype',
|
||
'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track',
|
||
'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track',
|
||
'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding',
|
||
'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports',
|
||
'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance',
|
||
'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship'
|
||
]
|
||
|
||
text_clean = text.strip().lower()
|
||
for pattern in subject_patterns:
|
||
if pattern.lower() in text_clean:
|
||
return True
|
||
|
||
# Also check for specific subject names found in the data
|
||
specific_subjects = ['матем.', 'информ.', 'англ.яз', 'русск.яз', 'каб.', 'business', 'maths',
|
||
'speaking', 'ict', 'geography', 'physics', 'robotics', 'science', 'ai core',
|
||
'vr/ar', 'cybersafety', 'design', 'prototype', 'mediacom', 'culinary',
|
||
'physical education', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics',
|
||
'climbing', 'games', 'art', 'music', 'dance', 'karate', 'chess', 'leadership']
|
||
for subj in specific_subjects:
|
||
if subj in text_clean:
|
||
return True
|
||
|
||
return False
|
||
|
||
def _find_matching_subject_in_header_from_list(self, subject_label, header_subjects, header_row):
|
||
"""Find the matching full subject name in the header based on the label"""
|
||
if not subject_label:
|
||
return None
|
||
|
||
# Look for the best match in the header subjects
|
||
subject_label_lower = subject_label.lower().replace('.', '').replace('яз', 'язык')
|
||
|
||
# Direct match first
|
||
for col_idx, full_subj in header_subjects:
|
||
if subject_label_lower in full_subj.lower() or full_subj.lower() in subject_label_lower:
|
||
return full_subj
|
||
|
||
# If no direct match, try to find by partial matching in the whole header row
|
||
for i, header_item in enumerate(header_row):
|
||
if subject_label_lower in str(header_item).lower() or str(header_item).lower() in subject_label_lower:
|
||
return str(header_item).strip()
|
||
|
||
# Try more general matching - if label contains common abbreviations
|
||
for col_idx, full_subj in header_subjects:
|
||
full_lower = full_subj.lower()
|
||
if ('матем' in subject_label_lower and 'матем' in full_lower) or \
|
||
('информ' in subject_label_lower and 'информ' in full_lower) or \
|
||
('англ' in subject_label_lower and 'англ' in full_lower) or \
|
||
('русск' in subject_label_lower and 'русск' in full_lower) or \
|
||
('физик' in subject_label_lower and 'физик' in full_lower) or \
|
||
('хим' in subject_label_lower and 'хим' in full_lower) or \
|
||
('биол' in subject_label_lower and 'биол' in full_lower) or \
|
||
('истор' in subject_label_lower and 'истор' in full_lower) or \
|
||
('общ' in subject_label_lower and 'общ' in full_lower) or \
|
||
('географ' in subject_label_lower and 'географ' in full_lower):
|
||
return full_subj
|
||
|
||
return None
|
||
|
||
def find_student(self, name_query):
|
||
"""Search for students by name"""
|
||
self.cursor.execute("""
|
||
SELECT s.full_name, s.class_name
|
||
FROM students s
|
||
WHERE s.full_name LIKE ?
|
||
LIMIT 10
|
||
""", (f'%{name_query}%',))
|
||
|
||
return self.cursor.fetchall()
|
||
|
||
def get_current_class(self, student_name, current_day, current_time):
|
||
"""Find student's current class"""
|
||
self.cursor.execute("""
|
||
SELECT sub.name, t.name, p.start_time, p.end_time
|
||
FROM schedule sch
|
||
JOIN students s ON sch.student_id = s.student_id
|
||
JOIN subjects sub ON sch.subject_id = sub.subject_id
|
||
JOIN teachers t ON sch.teacher_id = t.teacher_id
|
||
JOIN days d ON sch.day_id = d.day_id
|
||
JOIN periods p ON sch.period_id = p.period_id
|
||
JOIN groups g ON sch.group_id = g.group_id
|
||
WHERE s.full_name = ?
|
||
AND d.name = ?
|
||
AND p.start_time <= ?
|
||
AND p.end_time >= ?
|
||
""", (student_name, current_day, current_time, current_time))
|
||
|
||
return self.cursor.fetchone()
|
||
|
||
def get_student_schedule(self, student_name):
|
||
"""Get full schedule for a student"""
|
||
self.cursor.execute("""
|
||
SELECT sub.name, t.name, p.start_time, p.end_time, g.name
|
||
FROM schedule sch
|
||
JOIN students s ON sch.student_id = s.student_id
|
||
JOIN subjects sub ON sch.subject_id = sub.subject_id
|
||
JOIN teachers t ON sch.teacher_id = t.teacher_id
|
||
JOIN periods p ON sch.period_id = p.period_id
|
||
JOIN groups g ON sch.group_id = g.group_id
|
||
WHERE s.full_name = ?
|
||
ORDER BY p.period_number
|
||
""", (student_name,))
|
||
|
||
return self.cursor.fetchall()
|
||
|
||
def _is_likely_teacher_name_enhanced(self, text):
|
||
"""Enhanced check if the text is likely to be a teacher name"""
|
||
if not text or len(text.strip()) < 5: # Require minimum length for a name
|
||
return False
|
||
|
||
text = text.strip()
|
||
|
||
# Common non-name values that appear in the CSV
|
||
common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group', 'каб.', 'гр.', 'фитнес', 'каб', 'все группы', '1 группа', '2 группа', 'Е1', 'Е2', 'Е3', 'Е4', 'Е5', 'Е6', 'Е1 Е2', 'Е4 Е5', 'E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'гр 1', 'гр 2']
|
||
if text.lower() in common_non_names:
|
||
return False
|
||
|
||
# Exclusion patterns for non-teacher entries
|
||
exclusion_patterns = [
|
||
r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$', # E4 E5 pattern
|
||
r'^[A-Z]\d+\s+[A-Z]\d+$', # English groups
|
||
r'.*[Tt]rack.*', # Track identifiers
|
||
r'.*[Gg]roup.*', # Group identifiers
|
||
r'.*\d+[А-ЯA-Z]\d*$', # Number-letter combos
|
||
r'^[А-ЯЁA-Z].*\d+', # Text ending with digits
|
||
r'.*[Cc]lub.*', # Club identifiers
|
||
r'.*[Rr]oom.*', # Room identifiers
|
||
r'.*[Cc]lass.*', # Class identifiers
|
||
r'.*[Pp]eriod.*', # Period identifiers
|
||
r'^\d+$', # Just numbers
|
||
r'^[А-ЯЁA-Z]*$', # All caps words
|
||
r'^[А-ЯЁA-Z\s\d]+$', # Caps words and numbers (likely room numbers)
|
||
r'^[ЕеEe][\d\s,]+$', # Room identifiers like E1, E2, etc.
|
||
]
|
||
|
||
for pattern in exclusion_patterns:
|
||
if re.match(pattern, text, re.IGNORECASE):
|
||
return False
|
||
|
||
# Check if it looks like a name with multiple capitalized words (Russian or English)
|
||
# Teacher names typically have 2-4 words with capitalized first letters
|
||
words = text.split()
|
||
if len(words) < 2 or len(words) > 4:
|
||
return False
|
||
|
||
# Check if most words start with capital letters (allowing for exceptions like "van", "de", etc.)
|
||
capital_words = 0
|
||
for word in words:
|
||
# Skip common particles that are lowercase in names
|
||
if word in ['van', 'von', 'de', 'di', 'le', 'la', 'du', 'del', 'da', 'и', 'на', 'де']:
|
||
capital_words += 1
|
||
elif word[0].isupper() and len(word) > 1:
|
||
capital_words += 1
|
||
|
||
# At least n-1 words should be capitalized (for n-word names)
|
||
if capital_words < len(words) - 1:
|
||
return False
|
||
|
||
# Additional check: if it looks like a proper name (with capital letters and min length)
|
||
# and doesn't match exclusion patterns
|
||
name_parts = text.split()
|
||
if len(name_parts) >= 2:
|
||
# At least two parts (first name + last name)
|
||
# Check if they start with capital letters
|
||
if all(part[0].isupper() for part in name_parts if len(part) > 1):
|
||
# Additional check: make sure it's not just a title or other text
|
||
common_titles = ['Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Teacher', 'Instructor', 'Coach']
|
||
if any(title in text for title in common_titles):
|
||
return False
|
||
return True
|
||
|
||
return False
|
||
|
||
|
||
def _is_likely_group_identifier(self, text):
|
||
"""Check if text is likely a group identifier like 'E1', 'E2', 'гр 1', etc."""
|
||
if not text:
|
||
return False
|
||
|
||
text = text.strip()
|
||
|
||
# Common group identifiers
|
||
group_patterns = [
|
||
r'^[Ee]\d+', # E1, E2, etc.
|
||
r'^[Ee]\d+\s*[Ee]\d+', # E1 E2, E4 E5, etc.
|
||
r'^(гр|group|группа).*', # "гр 1", "group 1", etc.
|
||
r'^[А-ЯA-Z]\d+', # A1, B2, etc.
|
||
r'^[А-ЯA-Z]\d+\s+[А-ЯA-Z]\d+', # A1 B2, etc.
|
||
r'^(все группы|all groups).*', # "все группы", etc.
|
||
r'^\d+\s*(группа|class).*', # "1 группа", etc.
|
||
r'^(1|2)\s*(группа|group)', # "1 группа", "2 group", etc.
|
||
]
|
||
|
||
for pattern in group_patterns:
|
||
if re.match(pattern, text, re.IGNORECASE):
|
||
return True
|
||
|
||
# Additional common group indicators
|
||
common_groups = ['E1 E2', 'E3 E4', 'E5 E6', 'E1', 'E2', 'E3', 'E4', 'E5', 'E6',
|
||
'1 группа', '2 группа', 'все группы', 'гр 1', 'гр 2', 'all groups',
|
||
'group 1', 'group 2', 'A1', 'B1', 'C1', '4A', '4B', '4C', '4ABC']
|
||
|
||
return text in common_groups
|
||
|
||
def update_teachers_from_cheat_sheet(self, teachers_csv_path):
|
||
"""Update the teachers table from the Teachers.csv file"""
|
||
import csv
|
||
|
||
with open(teachers_csv_path, 'r', encoding='utf-8') as file:
|
||
reader = csv.reader(file)
|
||
rows = list(reader)
|
||
|
||
# Set to store unique teacher names
|
||
unique_teachers = set()
|
||
|
||
# Process the CSV to extract teacher names
|
||
for row in rows:
|
||
for cell in row:
|
||
cell = cell.strip()
|
||
# Check if the cell looks like a teacher name (Russian/English name format)
|
||
if self._is_likely_teacher_name_enhanced(cell):
|
||
unique_teachers.add(cell)
|
||
|
||
# Also check for names that might be combined with "/" (like "Name1 / Name2")
|
||
if '/' in cell:
|
||
parts = [part.strip() for part in cell.split('/')]
|
||
for part in parts:
|
||
if self._is_likely_teacher_name_enhanced(part):
|
||
unique_teachers.add(part)
|
||
|
||
# Insert unique teachers into the database
|
||
for teacher_name in unique_teachers:
|
||
if teacher_name: # Make sure it's not an empty string
|
||
self.cursor.execute(
|
||
"INSERT OR IGNORE INTO teachers (name, email, phone) VALUES (?, NULL, NULL)",
|
||
(teacher_name,)
|
||
)
|
||
|
||
print(f"Added {len(unique_teachers)} unique teachers from Teachers.csv")
|
||
self.conn.commit()
|
||
|
||
|
||
# Main execution - just setup database
|
||
if __name__ == "__main__":
|
||
db = SchoolScheduleDB()
|
||
# Check if auto-update flag is passed as argument
|
||
auto_update = len(sys.argv) > 1 and sys.argv[1] == '--auto'
|
||
db.update_database_from_csv(auto_update=auto_update)
|
||
db.close()
|
||
|