721 lines
36 KiB
Python
721 lines
36 KiB
Python
#!/usr/bin/env python
|
||
"""
|
||
database.py - School schedule database (normalized version)
|
||
Creates normalized tables and extracts from CSV with proper relationships
|
||
"""
|
||
|
||
import sqlite3
|
||
import csv
|
||
import os
|
||
import sys
|
||
import re
|
||
|
||
class SchoolScheduleDB:
|
||
def __init__(self, db_name='school_schedule.db'):
|
||
self.conn = sqlite3.connect(db_name)
|
||
self.cursor = self.conn.cursor()
|
||
# Initialize database tables
|
||
self.create_tables()
|
||
|
||
def create_tables(self):
|
||
"""Create normalized tables with proper relationships"""
|
||
# Teachers table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS teachers (
|
||
teacher_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL,
|
||
email TEXT,
|
||
phone TEXT
|
||
)
|
||
""")
|
||
|
||
# Subjects table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS subjects (
|
||
subject_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL,
|
||
description TEXT
|
||
)
|
||
""")
|
||
|
||
# Days table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS days (
|
||
day_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL -- e.g., Monday, Tuesday, etc.
|
||
)
|
||
""")
|
||
|
||
# Periods table - with proper unique constraint
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS periods (
|
||
period_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
period_number INTEGER,
|
||
start_time TEXT,
|
||
end_time TEXT,
|
||
UNIQUE(period_number, start_time, end_time)
|
||
)
|
||
""")
|
||
|
||
# Groups table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS groups (
|
||
group_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT UNIQUE NOT NULL,
|
||
description TEXT,
|
||
class_name TEXT
|
||
)
|
||
""")
|
||
|
||
# Students table
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS students (
|
||
student_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
class_name TEXT,
|
||
full_name TEXT NOT NULL
|
||
)
|
||
""")
|
||
|
||
# Schedule table with foreign key relationships
|
||
self.cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS schedule (
|
||
entry_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
student_id INTEGER,
|
||
subject_id INTEGER,
|
||
teacher_id INTEGER,
|
||
day_id INTEGER,
|
||
period_id INTEGER,
|
||
group_id INTEGER,
|
||
FOREIGN KEY (student_id) REFERENCES students(student_id),
|
||
FOREIGN KEY (subject_id) REFERENCES subjects(subject_id),
|
||
FOREIGN KEY (teacher_id) REFERENCES teachers(teacher_id),
|
||
FOREIGN KEY (day_id) REFERENCES days(day_id),
|
||
FOREIGN KEY (period_id) REFERENCES periods(period_id),
|
||
FOREIGN KEY (group_id) REFERENCES groups(group_id)
|
||
)
|
||
""")
|
||
|
||
self.conn.commit()
|
||
|
||
def populate_periods_table(self):
|
||
"""Populate the periods table with standard school periods"""
|
||
period_times = {
|
||
'1': ('09:00', '09:40'),
|
||
'2': ('10:00', '10:40'),
|
||
'3': ('11:00', '11:40'),
|
||
'4': ('11:50', '12:30'),
|
||
'5': ('12:40', '13:20'),
|
||
'6': ('13:30', '14:10'),
|
||
'7': ('14:20', '15:00'),
|
||
'8': ('15:20', '16:00'),
|
||
'9': ('16:15', '16:55'),
|
||
'10': ('17:05', '17:45'),
|
||
'11': ('17:55', '18:35'),
|
||
'12': ('18:45', '19:20'),
|
||
'13': ('19:20', '20:00')
|
||
}
|
||
|
||
for period_num, (start_time, end_time) in period_times.items():
|
||
self.cursor.execute(
|
||
"INSERT OR IGNORE INTO periods (period_number, start_time, end_time) VALUES (?, ?, ?)",
|
||
(int(period_num), start_time, end_time)
|
||
)
|
||
|
||
# Add days of the week
|
||
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
||
for day in days_of_week:
|
||
self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (day,))
|
||
|
||
self.conn.commit()
|
||
|
||
def update_database_from_csv(self, auto_update=True):
|
||
"""Automatically update database from specific CSV files in the sample_data directory"""
|
||
sample_data_dir = "sample_data"
|
||
|
||
if not os.path.exists(sample_data_dir):
|
||
print(f"Directory '{sample_data_dir}' not found.")
|
||
return
|
||
|
||
# Get all CSV files and filter out the schedule template and sheet files
|
||
all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
|
||
|
||
# Keep only the actual student distribution files (not the sheets)
|
||
csv_files = []
|
||
for filename in all_csv_files:
|
||
if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
|
||
csv_files.append(filename)
|
||
|
||
if not csv_files:
|
||
print(f"No student data CSV files found in '{sample_data_dir}' directory.")
|
||
return
|
||
|
||
print(f"Found {len(csv_files)} student data CSV file(s):")
|
||
for i, filename in enumerate(csv_files, 1):
|
||
print(f" {i}. {filename}")
|
||
|
||
if auto_update:
|
||
print("\nAuto-updating database with all student data CSV files...")
|
||
files_to_update = csv_files
|
||
else:
|
||
response = input("\nUpdate database with CSV files? (yes/no): ").lower()
|
||
|
||
if response not in ['yes', 'y', 'да']:
|
||
print("Skipping database update.")
|
||
return
|
||
|
||
print(f"\n0. Update all files")
|
||
|
||
try:
|
||
selection = input(f"\nSelect file(s) to update (0 for all, or comma-separated numbers like 1,2,3): ")
|
||
|
||
if selection.strip() == '0':
|
||
# Update all files
|
||
files_to_update = csv_files
|
||
else:
|
||
# Parse user selection
|
||
indices = [int(x.strip()) - 1 for x in selection.split(',')]
|
||
files_to_update = [csv_files[i] for i in indices if 0 <= i < len(csv_files)]
|
||
|
||
if not files_to_update:
|
||
print("No valid selections made.")
|
||
return
|
||
except ValueError:
|
||
print("Invalid input. Please enter numbers separated by commas or '0' for all files.")
|
||
return
|
||
|
||
# Populate the periods and days tables first
|
||
self.populate_periods_table()
|
||
|
||
print(f"\nUpdating database with {len(files_to_update)} file(s):")
|
||
for filename in files_to_update:
|
||
print(f" - {filename}")
|
||
|
||
csv_path = os.path.join(sample_data_dir, filename)
|
||
print(f"Processing {csv_path}...")
|
||
|
||
self.process_csv_with_teacher_mapping(csv_path)
|
||
|
||
print("Database updated successfully with selected CSV data.")
|
||
|
||
def process_csv_with_teacher_mapping(self, csv_file):
|
||
"""Process CSV with teacher-subject mapping based on positional order"""
|
||
if not os.path.exists(csv_file):
|
||
return False
|
||
|
||
with open(csv_file, 'r', encoding='utf-8') as file:
|
||
reader = csv.reader(file)
|
||
rows = list(reader)
|
||
|
||
# Identify header row - look for the row containing "ФИО" (full name) or similar indicators
|
||
header_idx = None
|
||
for i, row in enumerate(rows):
|
||
for cell in row:
|
||
if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell):
|
||
header_idx = i
|
||
break
|
||
if header_idx is not None:
|
||
break
|
||
|
||
if header_idx is None:
|
||
# Check if this file contains class and name columns that identify it as a student data file
|
||
# Even if the header doesn't contain ФИО, we might still be able to identify student data
|
||
has_class_indicators = any(
|
||
any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class'])
|
||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||
)
|
||
|
||
has_name_indicators = any(
|
||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname'])
|
||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||
)
|
||
|
||
if has_class_indicators and has_name_indicators:
|
||
# Try to find the header row by looking for class and name indicators
|
||
for i, row in enumerate(rows):
|
||
if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \
|
||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']):
|
||
header_idx = i
|
||
break
|
||
|
||
if header_idx is None:
|
||
print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns")
|
||
return False
|
||
|
||
# Find teacher-subject mappings in the first 0-15 rows before the header
|
||
teacher_subject_map = {}
|
||
|
||
# Build a mapping of subject names in the header row
|
||
header_row = rows[header_idx]
|
||
header_subjects = {}
|
||
for col_idx, subject_name in enumerate(header_row):
|
||
subject_name = str(subject_name).strip()
|
||
if (subject_name and
|
||
subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкафчика', 'локера'] and
|
||
subject_name.strip() != "" and
|
||
"ф.и.о" not in subject_name.lower() and
|
||
"сортировка" not in subject_name.lower() and
|
||
"номер" not in subject_name.lower() and
|
||
"№" not in subject_name):
|
||
header_subjects[col_idx] = subject_name # Map column index to subject name
|
||
|
||
# First, try to find teachers in the rows before the header
|
||
for i in range(min(15, header_idx)): # Check first 15 rows before header
|
||
current_row = rows[i]
|
||
|
||
# Process all cells in the row to find teacher names and their adjacent context
|
||
for j, cell_value in enumerate(current_row):
|
||
cell_str = str(cell_value).strip()
|
||
|
||
# Check if this cell is a likely teacher name
|
||
if self._is_likely_teacher_name(cell_str):
|
||
# Look for context on the left (department) and right (subject)
|
||
left_context = ""
|
||
right_context = ""
|
||
|
||
# Get left neighbor (department)
|
||
if j > 0 and j-1 < len(current_row):
|
||
left_context = str(current_row[j-1]).strip()
|
||
|
||
# Get right neighbor (subject)
|
||
if j < len(current_row) - 1:
|
||
right_context = str(current_row[j+1]).strip()
|
||
|
||
# Try to determine the subject based on adjacency
|
||
matched_subject = None
|
||
|
||
# First priority: right neighbor if it matches a subject in the header
|
||
if right_context and j+1 in header_subjects:
|
||
matched_subject = header_subjects[j+1]
|
||
# Second priority: use left context if it semantically relates to a teacher
|
||
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
|
||
# If left context indicates a department, look for subject to the right of teacher
|
||
if j+1 in header_subjects:
|
||
matched_subject = header_subjects[j+1]
|
||
# If no subject to the right, try to map by position
|
||
elif j in header_subjects:
|
||
matched_subject = header_subjects[j]
|
||
# Third priority: try to map by position
|
||
elif j in header_subjects:
|
||
matched_subject = header_subjects[j]
|
||
|
||
# Only add if we don't have a better teacher name for this subject yet
|
||
if matched_subject and (matched_subject not in teacher_subject_map or
|
||
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
|
||
teacher_subject_map[matched_subject] = cell_str
|
||
|
||
# If the cell contains multiple names (separated by newlines), process each separately
|
||
elif '\n' in cell_str or '\\n' in cell_str:
|
||
cell_parts = [part.strip() for part in cell_str.replace('\\n', '\n').split('\n') if part.strip()]
|
||
for part in cell_parts:
|
||
if self._is_likely_teacher_name(part):
|
||
# Look for context on the left (department) and right (subject)
|
||
left_context = ""
|
||
right_context = ""
|
||
|
||
# Get left neighbor (department)
|
||
if j > 0 and j-1 < len(current_row):
|
||
left_context = str(current_row[j-1]).strip()
|
||
|
||
# Get right neighbor (subject)
|
||
if j < len(current_row) - 1:
|
||
right_context = str(current_row[j+1]).strip()
|
||
|
||
# Try to determine the subject based on adjacency
|
||
matched_subject = None
|
||
|
||
# First priority: right neighbor if it matches a subject in the header
|
||
if right_context and j+1 in header_subjects:
|
||
matched_subject = header_subjects[j+1]
|
||
# Second priority: use left context if it semantically relates to a teacher
|
||
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
|
||
# If left context indicates a department, look for subject to the right of teacher
|
||
if j+1 in header_subjects:
|
||
matched_subject = header_subjects[j+1]
|
||
# If no subject to the right, try to map by position
|
||
elif j in header_subjects:
|
||
matched_subject = header_subjects[j]
|
||
# Third priority: try to map by position
|
||
elif j in header_subjects:
|
||
matched_subject = header_subjects[j]
|
||
|
||
# Only add if we don't have a better teacher name for this subject yet
|
||
if matched_subject and (matched_subject not in teacher_subject_map or
|
||
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
|
||
teacher_subject_map[matched_subject] = part
|
||
|
||
# Additional teacher-subject mapping: scan the rows immediately before the header for teacher names in subject columns
|
||
# In many CSV files, teacher names appear in the same rows as subject headers
|
||
for i in range(max(0, header_idx - 5), header_idx): # Check 5 rows before header
|
||
current_row = rows[i]
|
||
for j, cell_value in enumerate(current_row):
|
||
cell_str = str(cell_value).strip()
|
||
|
||
# If cell contains a likely teacher name and corresponds to a subject column
|
||
if self._is_likely_teacher_name(cell_str) and j in header_subjects:
|
||
subject_name = header_subjects[j]
|
||
# Only add if we don't have a better teacher name for this subject yet
|
||
if (subject_name not in teacher_subject_map or
|
||
'Default Teacher for' in teacher_subject_map.get(subject_name, '')):
|
||
teacher_subject_map[subject_name] = cell_str
|
||
|
||
# Additional validation: Remove any teacher-subject mappings that seem incorrect
|
||
validated_teacher_subject_map = {}
|
||
for subject, teacher in teacher_subject_map.items():
|
||
# Only add to validated map if teacher name passes all checks
|
||
if self._is_likely_teacher_name(teacher):
|
||
validated_teacher_subject_map[subject] = teacher
|
||
else:
|
||
print(f"Warning: Invalid teacher name '{teacher}' detected for subject '{subject}', skipping...")
|
||
|
||
teacher_subject_map = validated_teacher_subject_map
|
||
|
||
# Process each student row
|
||
for student_row in rows[header_idx + 1:]:
|
||
# Determine the structure dynamically based on the header
|
||
class_col_idx = None
|
||
name_col_idx = None
|
||
|
||
# Find the index of the class column (usually called "Класс")
|
||
for idx, header in enumerate(header_row):
|
||
if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header):
|
||
class_col_idx = idx
|
||
break
|
||
|
||
# Find the index of the name column (usually called "ФИО")
|
||
for idx, header in enumerate(header_row):
|
||
if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower():
|
||
name_col_idx = idx
|
||
break
|
||
|
||
# If we couldn't find the columns properly, skip this row
|
||
if class_col_idx is None or name_col_idx is None:
|
||
continue
|
||
|
||
# Check if this row has valid data in the expected columns
|
||
if (len(student_row) > max(class_col_idx, name_col_idx) and
|
||
student_row[class_col_idx].strip() and # class name exists
|
||
student_row[name_col_idx].strip() and # student name exists
|
||
self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)):
|
||
|
||
name = student_row[name_col_idx].strip() # Name column
|
||
class_name = student_row[class_col_idx].strip() # Class column
|
||
|
||
# Insert student into the database
|
||
self.cursor.execute(
|
||
"INSERT OR IGNORE INTO students (class_name, full_name) VALUES (?, ?)",
|
||
(class_name, name)
|
||
)
|
||
|
||
# Get the student_id for this student
|
||
self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, class_name))
|
||
student_id_result = self.cursor.fetchone()
|
||
if student_id_result is None:
|
||
continue
|
||
student_id = student_id_result[0]
|
||
|
||
# Process schedule data for this student
|
||
# Go through each column to find subject and group info
|
||
for col_idx, cell_value in enumerate(student_row):
|
||
if cell_value and col_idx < len(header_row):
|
||
# Get the subject from the header
|
||
subject_header = header_row[col_idx] if col_idx < len(header_row) else ""
|
||
|
||
# Skip columns that don't contain schedule information
|
||
if (col_idx == 0 or col_idx == 1 or col_idx == 2 or col_idx == class_col_idx or col_idx == name_col_idx or # skip metadata cols
|
||
"сортировка" in subject_header.lower() or
|
||
"номер" in subject_header.lower() or
|
||
"шкафчика" in subject_header.lower() or
|
||
"локера" in subject_header.lower()):
|
||
continue
|
||
|
||
# Extract group information from the cell
|
||
group_assignment = cell_value.strip()
|
||
|
||
if group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
|
||
# Find the teacher associated with this subject
|
||
subject_name = str(subject_header).strip()
|
||
teacher_name = teacher_subject_map.get(subject_name, f"Default Teacher for {subject_name}")
|
||
|
||
# Insert the entities into their respective tables first
|
||
# Then get their IDs to create the schedule entry
|
||
self._process_schedule_entry_with_teacher_mapping(
|
||
student_id, group_assignment, subject_name, teacher_name
|
||
)
|
||
|
||
self.conn.commit()
|
||
return True
|
||
|
||
def _is_valid_student_record_by_cols(self, row, class_col_idx, name_col_idx):
|
||
"""Check if a row represents a valid student record based on specific columns"""
|
||
# A valid student record should have:
|
||
# - Non-empty class name in the class column
|
||
# - Non-empty student name in the name column
|
||
|
||
if len(row) <= max(class_col_idx, name_col_idx):
|
||
return False
|
||
|
||
class_name = row[class_col_idx].strip() if len(row) > class_col_idx else ""
|
||
student_name = row[name_col_idx].strip() if len(row) > name_col_idx else ""
|
||
|
||
# Check if the class name looks like an actual class (contains a number followed by a letter)
|
||
class_pattern = r'^\d+[А-ЯA-Z]$' # e.g., 6А, 11А, 4B
|
||
if re.match(class_pattern, class_name):
|
||
return bool(student_name and student_name != class_name) # Ensure name exists and is different from class
|
||
|
||
# If not matching class pattern, check if the name field is not just another class-like value
|
||
name_pattern = r'^\d+[А-ЯA-Z]$' # This would indicate it's probably a class, not a name
|
||
if re.match(name_pattern, student_name):
|
||
return False # This row has a class in the name field, so not valid
|
||
|
||
return bool(class_name and student_name and class_name != student_name)
|
||
|
||
def _process_schedule_entry_with_teacher_mapping(self, student_id, group_info, subject_info, teacher_name):
|
||
"""Process individual schedule entries with explicit teacher mapping and insert into normalized tables"""
|
||
# Clean up the inputs
|
||
subject_name = subject_info.strip() if subject_info.strip() else "General Class"
|
||
group_assignment = group_info.strip()
|
||
|
||
# Only proceed if we have valid data
|
||
if subject_name and group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
|
||
# Insert subject if not exists and get its ID
|
||
self.cursor.execute("INSERT OR IGNORE INTO subjects (name) VALUES (?)", (subject_name,))
|
||
self.cursor.execute("SELECT subject_id FROM subjects WHERE name = ?", (subject_name,))
|
||
subject_id = self.cursor.fetchone()[0]
|
||
|
||
# Insert teacher if not exists and get its ID
|
||
# Use the teacher name as is, without default creation if not found
|
||
self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (teacher_name,))
|
||
self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (teacher_name,))
|
||
teacher_result = self.cursor.fetchone()
|
||
if teacher_result:
|
||
teacher_id = teacher_result[0]
|
||
else:
|
||
# Fallback to a default teacher if the extracted name is invalid
|
||
default_teacher = "Неизвестный преподаватель"
|
||
self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (default_teacher,))
|
||
self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (default_teacher,))
|
||
teacher_id = self.cursor.fetchone()[0]
|
||
|
||
# Use a default day for now (in a real system, we'd extract this from the schedule)
|
||
# For now, we'll randomly assign to a day of the week
|
||
import random
|
||
days_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
|
||
selected_day = random.choice(days_list)
|
||
self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (selected_day,))
|
||
self.cursor.execute("SELECT day_id FROM days WHERE name = ?", (selected_day,))
|
||
day_id = self.cursor.fetchone()[0]
|
||
|
||
# Use a default period - for now we'll use period 1, but in a real system
|
||
# we would need to extract this from the CSV if available
|
||
self.cursor.execute("SELECT period_id FROM periods WHERE period_number = 1 LIMIT 1")
|
||
period_result = self.cursor.fetchone()
|
||
if period_result:
|
||
period_id = period_result[0]
|
||
else:
|
||
# Fallback if no periods were inserted
|
||
self.cursor.execute("SELECT period_id FROM periods LIMIT 1")
|
||
period_id = self.cursor.fetchone()[0]
|
||
|
||
# Clean the group name to separate it from student data
|
||
group_name = self._clean_group_name(group_assignment)
|
||
self.cursor.execute("INSERT OR IGNORE INTO groups (name) VALUES (?)", (group_name,))
|
||
self.cursor.execute("SELECT group_id FROM groups WHERE name = ?", (group_name,))
|
||
group_id = self.cursor.fetchone()[0]
|
||
|
||
# Insert the schedule entry
|
||
self.cursor.execute("""
|
||
INSERT OR IGNORE INTO schedule (student_id, subject_id, teacher_id, day_id, period_id, group_id)
|
||
VALUES (?, ?, ?, ?, ?, ?)
|
||
""", (student_id, subject_id, teacher_id, day_id, period_id, group_id))
|
||
|
||
def _clean_group_name(self, raw_group_data):
|
||
"""Extract clean group name from potentially mixed student/group data"""
|
||
# Remove potential student names from the group data
|
||
# Group names typically contain numbers, class identifiers, or specific activity names
|
||
cleaned = raw_group_data.strip()
|
||
|
||
# If the group data looks like it contains a student name pattern,
|
||
# we'll try to extract just the group identifier part
|
||
if re.match(r'^\d+[А-ЯA-Z]', cleaned):
|
||
# This looks like a class designation, return as is
|
||
return cleaned
|
||
|
||
# If the group data contains common group indicators, return as is
|
||
group_indicators = ['кл', 'class', 'club', 'track', 'group', 'module', '-']
|
||
if any(indicator in cleaned.lower() for indicator in group_indicators):
|
||
return cleaned
|
||
|
||
# If the group data looks like a subject-identifier pattern, return as is
|
||
subject_indicators = ['ICT', 'English', 'Math', 'Physics', 'Chemistry', 'Biology', 'Science']
|
||
if any(indicator in cleaned for indicator in subject_indicators):
|
||
return cleaned
|
||
|
||
# If none of the above conditions match, return a generic group name
|
||
return f"Group_{hash(cleaned) % 10000}"
|
||
|
||
def _is_likely_teacher_name(self, text):
|
||
"""Check if the text is likely to be a teacher name"""
|
||
if not text or len(text.strip()) < 5: # Require minimum length for a name
|
||
return False
|
||
|
||
text = text.strip()
|
||
|
||
# Common non-name values that appear in the CSV
|
||
common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group']
|
||
if text.lower() in common_non_names:
|
||
return False
|
||
|
||
# Exclusion patterns for non-teacher entries
|
||
exclusion_patterns = [
|
||
r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$', # E4 E5 pattern
|
||
r'^[A-Z]\d+\s+[A-Z]\d+$', # English groups
|
||
r'.*[Tt]rack.*', # Track identifiers
|
||
r'.*[Gg]roup.*', # Group identifiers
|
||
r'.*\d+[А-ЯA-Z]\d*$', # Number-letter combos
|
||
r'^[А-ЯЁA-Z].*\d+', # Text ending with digits
|
||
r'.*[Cc]lub.*', # Club identifiers
|
||
]
|
||
|
||
for pattern in exclusion_patterns:
|
||
if re.match(pattern, text, re.IGNORECASE):
|
||
return False
|
||
|
||
# Positive patterns for teacher names
|
||
teacher_patterns = [
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.\s*[А-ЯЁ]\.$', # Иванов А.А.
|
||
r'^[А-ЯЁ]\.\s*[А-ЯЁ]\.\s+[А-ЯЁ][а-яё]+$', # А.А. Иванов
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Full name
|
||
r'^[A-Z][a-z]+\s+[A-Z][a-z]+$', # John Smith
|
||
r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$', # Smith J.J.
|
||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Russian names without patronymic
|
||
]
|
||
|
||
for pattern in teacher_patterns:
|
||
if re.match(pattern, text.strip()):
|
||
return True
|
||
|
||
# Additional check: if it looks like a proper name (with capital letters and min length)
|
||
# and doesn't match exclusion patterns
|
||
name_parts = text.split()
|
||
if len(name_parts) >= 2:
|
||
# At least two parts (first name + last name)
|
||
# Check if they start with capital letters
|
||
if all(part[0].isupper() for part in name_parts if len(part) > 1):
|
||
return True
|
||
|
||
return False
|
||
|
||
def _is_likely_subject_label(self, text):
|
||
"""Check if text is likely a subject label like 'Матем.', 'Информ.', 'Англ.яз', etc."""
|
||
if not text or len(text) < 2:
|
||
return False
|
||
|
||
# Common Russian abbreviations for subjects
|
||
subject_patterns = [
|
||
'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История',
|
||
'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство',
|
||
'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths',
|
||
'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR',
|
||
'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics',
|
||
'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype',
|
||
'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track',
|
||
'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track',
|
||
'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding',
|
||
'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports',
|
||
'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance',
|
||
'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship'
|
||
]
|
||
|
||
text_clean = text.strip().lower()
|
||
for pattern in subject_patterns:
|
||
if pattern.lower() in text_clean:
|
||
return True
|
||
|
||
# Also check for specific subject names found in the data
|
||
specific_subjects = ['матем.', 'информ.', 'англ.яз', 'русск.яз', 'каб.', 'business', 'maths',
|
||
'speaking', 'ict', 'geography', 'physics', 'robotics', 'science', 'ai core',
|
||
'vr/ar', 'cybersafety', 'design', 'prototype', 'mediacom', 'culinary',
|
||
'physical education', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics',
|
||
'climbing', 'games', 'art', 'music', 'dance', 'karate', 'chess', 'leadership']
|
||
for subj in specific_subjects:
|
||
if subj in text_clean:
|
||
return True
|
||
|
||
return False
|
||
|
||
def _find_matching_subject_in_header_from_list(self, subject_label, header_subjects, header_row):
|
||
"""Find the matching full subject name in the header based on the label"""
|
||
if not subject_label:
|
||
return None
|
||
|
||
# Look for the best match in the header subjects
|
||
subject_label_lower = subject_label.lower().replace('.', '').replace('яз', 'язык')
|
||
|
||
# Direct match first
|
||
for col_idx, full_subj in header_subjects:
|
||
if subject_label_lower in full_subj.lower() or full_subj.lower() in subject_label_lower:
|
||
return full_subj
|
||
|
||
# If no direct match, try to find by partial matching in the whole header row
|
||
for i, header_item in enumerate(header_row):
|
||
if subject_label_lower in str(header_item).lower() or str(header_item).lower() in subject_label_lower:
|
||
return str(header_item).strip()
|
||
|
||
# Try more general matching - if label contains common abbreviations
|
||
for col_idx, full_subj in header_subjects:
|
||
full_lower = full_subj.lower()
|
||
if ('матем' in subject_label_lower and 'матем' in full_lower) or \
|
||
('информ' in subject_label_lower and 'информ' in full_lower) or \
|
||
('англ' in subject_label_lower and 'англ' in full_lower) or \
|
||
('русск' in subject_label_lower and 'русск' in full_lower) or \
|
||
('физик' in subject_label_lower and 'физик' in full_lower) or \
|
||
('хим' in subject_label_lower and 'хим' in full_lower) or \
|
||
('биол' in subject_label_lower and 'биол' in full_lower) or \
|
||
('истор' in subject_label_lower and 'истор' in full_lower) or \
|
||
('общ' in subject_label_lower and 'общ' in full_lower) or \
|
||
('географ' in subject_label_lower and 'географ' in full_lower):
|
||
return full_subj
|
||
|
||
return None
|
||
|
||
def find_student(self, name_query):
|
||
"""Search for students by name"""
|
||
self.cursor.execute("""
|
||
SELECT s.full_name, s.class_name
|
||
FROM students s
|
||
WHERE s.full_name LIKE ?
|
||
LIMIT 10
|
||
""", (f'%{name_query}%',))
|
||
|
||
return self.cursor.fetchall()
|
||
|
||
def get_current_class(self, student_name, current_day, current_time):
|
||
"""Find student's current class"""
|
||
self.cursor.execute("""
|
||
SELECT sub.name, t.name, p.start_time, p.end_time
|
||
FROM schedule sch
|
||
JOIN students s ON sch.student_id = s.student_id
|
||
JOIN subjects sub ON sch.subject_id = sub.subject_id
|
||
JOIN teachers t ON sch.teacher_id = t.teacher_id
|
||
JOIN days d ON sch.day_id = d.day_id
|
||
JOIN periods p ON sch.period_id = p.period_id
|
||
JOIN groups g ON sch.group_id = g.group_id
|
||
WHERE s.full_name = ?
|
||
AND d.name = ?
|
||
AND p.start_time <= ?
|
||
AND p.end_time >= ?
|
||
""", (student_name, current_day, current_time, current_time))
|
||
|
||
return self.cursor.fetchone()
|
||
|
||
def close(self):
|
||
"""Close database connection"""
|
||
self.conn.close()
|
||
|
||
# Main execution - just setup database
|
||
if __name__ == "__main__":
|
||
db = SchoolScheduleDB()
|
||
# Check if auto-update flag is passed as argument
|
||
auto_update = len(sys.argv) > 1 and sys.argv[1] == '--auto'
|
||
db.update_database_from_csv(auto_update=auto_update)
|
||
db.close() |