Added
This commit is contained in:
721
scheduler_bots/database_fresh.py
Normal file
721
scheduler_bots/database_fresh.py
Normal file
@@ -0,0 +1,721 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
database.py - School schedule database (normalized version)
|
||||
Creates normalized tables and extracts from CSV with proper relationships
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
class SchoolScheduleDB:
|
||||
def __init__(self, db_name='school_schedule.db'):
|
||||
self.conn = sqlite3.connect(db_name)
|
||||
self.cursor = self.conn.cursor()
|
||||
# Initialize database tables
|
||||
self.create_tables()
|
||||
|
||||
def create_tables(self):
|
||||
"""Create normalized tables with proper relationships"""
|
||||
# Teachers table
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS teachers (
|
||||
teacher_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
email TEXT,
|
||||
phone TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
# Subjects table
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS subjects (
|
||||
subject_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
description TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
# Days table
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS days (
|
||||
day_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL -- e.g., Monday, Tuesday, etc.
|
||||
)
|
||||
""")
|
||||
|
||||
# Periods table - with proper unique constraint
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS periods (
|
||||
period_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
period_number INTEGER,
|
||||
start_time TEXT,
|
||||
end_time TEXT,
|
||||
UNIQUE(period_number, start_time, end_time)
|
||||
)
|
||||
""")
|
||||
|
||||
# Groups table
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS groups (
|
||||
group_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
description TEXT,
|
||||
class_name TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
# Students table
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS students (
|
||||
student_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
class_name TEXT,
|
||||
full_name TEXT NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Schedule table with foreign key relationships
|
||||
self.cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS schedule (
|
||||
entry_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
student_id INTEGER,
|
||||
subject_id INTEGER,
|
||||
teacher_id INTEGER,
|
||||
day_id INTEGER,
|
||||
period_id INTEGER,
|
||||
group_id INTEGER,
|
||||
FOREIGN KEY (student_id) REFERENCES students(student_id),
|
||||
FOREIGN KEY (subject_id) REFERENCES subjects(subject_id),
|
||||
FOREIGN KEY (teacher_id) REFERENCES teachers(teacher_id),
|
||||
FOREIGN KEY (day_id) REFERENCES days(day_id),
|
||||
FOREIGN KEY (period_id) REFERENCES periods(period_id),
|
||||
FOREIGN KEY (group_id) REFERENCES groups(group_id)
|
||||
)
|
||||
""")
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
def populate_periods_table(self):
|
||||
"""Populate the periods table with standard school periods"""
|
||||
period_times = {
|
||||
'1': ('09:00', '09:40'),
|
||||
'2': ('10:00', '10:40'),
|
||||
'3': ('11:00', '11:40'),
|
||||
'4': ('11:50', '12:30'),
|
||||
'5': ('12:40', '13:20'),
|
||||
'6': ('13:30', '14:10'),
|
||||
'7': ('14:20', '15:00'),
|
||||
'8': ('15:20', '16:00'),
|
||||
'9': ('16:15', '16:55'),
|
||||
'10': ('17:05', '17:45'),
|
||||
'11': ('17:55', '18:35'),
|
||||
'12': ('18:45', '19:20'),
|
||||
'13': ('19:20', '20:00')
|
||||
}
|
||||
|
||||
for period_num, (start_time, end_time) in period_times.items():
|
||||
self.cursor.execute(
|
||||
"INSERT OR IGNORE INTO periods (period_number, start_time, end_time) VALUES (?, ?, ?)",
|
||||
(int(period_num), start_time, end_time)
|
||||
)
|
||||
|
||||
# Add days of the week
|
||||
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
||||
for day in days_of_week:
|
||||
self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (day,))
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
def update_database_from_csv(self, auto_update=True):
|
||||
"""Automatically update database from specific CSV files in the sample_data directory"""
|
||||
sample_data_dir = "sample_data"
|
||||
|
||||
if not os.path.exists(sample_data_dir):
|
||||
print(f"Directory '{sample_data_dir}' not found.")
|
||||
return
|
||||
|
||||
# Get all CSV files and filter out the schedule template and sheet files
|
||||
all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
|
||||
|
||||
# Keep only the actual student distribution files (not the sheets)
|
||||
csv_files = []
|
||||
for filename in all_csv_files:
|
||||
if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
|
||||
csv_files.append(filename)
|
||||
|
||||
if not csv_files:
|
||||
print(f"No student data CSV files found in '{sample_data_dir}' directory.")
|
||||
return
|
||||
|
||||
print(f"Found {len(csv_files)} student data CSV file(s):")
|
||||
for i, filename in enumerate(csv_files, 1):
|
||||
print(f" {i}. {filename}")
|
||||
|
||||
if auto_update:
|
||||
print("\nAuto-updating database with all student data CSV files...")
|
||||
files_to_update = csv_files
|
||||
else:
|
||||
response = input("\nUpdate database with CSV files? (yes/no): ").lower()
|
||||
|
||||
if response not in ['yes', 'y', 'да']:
|
||||
print("Skipping database update.")
|
||||
return
|
||||
|
||||
print(f"\n0. Update all files")
|
||||
|
||||
try:
|
||||
selection = input(f"\nSelect file(s) to update (0 for all, or comma-separated numbers like 1,2,3): ")
|
||||
|
||||
if selection.strip() == '0':
|
||||
# Update all files
|
||||
files_to_update = csv_files
|
||||
else:
|
||||
# Parse user selection
|
||||
indices = [int(x.strip()) - 1 for x in selection.split(',')]
|
||||
files_to_update = [csv_files[i] for i in indices if 0 <= i < len(csv_files)]
|
||||
|
||||
if not files_to_update:
|
||||
print("No valid selections made.")
|
||||
return
|
||||
except ValueError:
|
||||
print("Invalid input. Please enter numbers separated by commas or '0' for all files.")
|
||||
return
|
||||
|
||||
# Populate the periods and days tables first
|
||||
self.populate_periods_table()
|
||||
|
||||
print(f"\nUpdating database with {len(files_to_update)} file(s):")
|
||||
for filename in files_to_update:
|
||||
print(f" - {filename}")
|
||||
|
||||
csv_path = os.path.join(sample_data_dir, filename)
|
||||
print(f"Processing {csv_path}...")
|
||||
|
||||
self.process_csv_with_teacher_mapping(csv_path)
|
||||
|
||||
print("Database updated successfully with selected CSV data.")
|
||||
|
||||
def process_csv_with_teacher_mapping(self, csv_file):
|
||||
"""Process CSV with teacher-subject mapping based on positional order"""
|
||||
if not os.path.exists(csv_file):
|
||||
return False
|
||||
|
||||
with open(csv_file, 'r', encoding='utf-8') as file:
|
||||
reader = csv.reader(file)
|
||||
rows = list(reader)
|
||||
|
||||
# Identify header row - look for the row containing "ФИО" (full name) or similar indicators
|
||||
header_idx = None
|
||||
for i, row in enumerate(rows):
|
||||
for cell in row:
|
||||
if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell):
|
||||
header_idx = i
|
||||
break
|
||||
if header_idx is not None:
|
||||
break
|
||||
|
||||
if header_idx is None:
|
||||
# Check if this file contains class and name columns that identify it as a student data file
|
||||
# Even if the header doesn't contain ФИО, we might still be able to identify student data
|
||||
has_class_indicators = any(
|
||||
any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class'])
|
||||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||||
)
|
||||
|
||||
has_name_indicators = any(
|
||||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname'])
|
||||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||||
)
|
||||
|
||||
if has_class_indicators and has_name_indicators:
|
||||
# Try to find the header row by looking for class and name indicators
|
||||
for i, row in enumerate(rows):
|
||||
if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \
|
||||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']):
|
||||
header_idx = i
|
||||
break
|
||||
|
||||
if header_idx is None:
|
||||
print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns")
|
||||
return False
|
||||
|
||||
# Find teacher-subject mappings in the first 0-15 rows before the header
|
||||
teacher_subject_map = {}
|
||||
|
||||
# Build a mapping of subject names in the header row
|
||||
header_row = rows[header_idx]
|
||||
header_subjects = {}
|
||||
for col_idx, subject_name in enumerate(header_row):
|
||||
subject_name = str(subject_name).strip()
|
||||
if (subject_name and
|
||||
subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкафчика', 'локера'] and
|
||||
subject_name.strip() != "" and
|
||||
"ф.и.о" not in subject_name.lower() and
|
||||
"сортировка" not in subject_name.lower() and
|
||||
"номер" not in subject_name.lower() and
|
||||
"№" not in subject_name):
|
||||
header_subjects[col_idx] = subject_name # Map column index to subject name
|
||||
|
||||
# First, try to find teachers in the rows before the header
|
||||
for i in range(min(15, header_idx)): # Check first 15 rows before header
|
||||
current_row = rows[i]
|
||||
|
||||
# Process all cells in the row to find teacher names and their adjacent context
|
||||
for j, cell_value in enumerate(current_row):
|
||||
cell_str = str(cell_value).strip()
|
||||
|
||||
# Check if this cell is a likely teacher name
|
||||
if self._is_likely_teacher_name(cell_str):
|
||||
# Look for context on the left (department) and right (subject)
|
||||
left_context = ""
|
||||
right_context = ""
|
||||
|
||||
# Get left neighbor (department)
|
||||
if j > 0 and j-1 < len(current_row):
|
||||
left_context = str(current_row[j-1]).strip()
|
||||
|
||||
# Get right neighbor (subject)
|
||||
if j < len(current_row) - 1:
|
||||
right_context = str(current_row[j+1]).strip()
|
||||
|
||||
# Try to determine the subject based on adjacency
|
||||
matched_subject = None
|
||||
|
||||
# First priority: right neighbor if it matches a subject in the header
|
||||
if right_context and j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# Second priority: use left context if it semantically relates to a teacher
|
||||
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
|
||||
# If left context indicates a department, look for subject to the right of teacher
|
||||
if j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# If no subject to the right, try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
# Third priority: try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
|
||||
# Only add if we don't have a better teacher name for this subject yet
|
||||
if matched_subject and (matched_subject not in teacher_subject_map or
|
||||
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
|
||||
teacher_subject_map[matched_subject] = cell_str
|
||||
|
||||
# If the cell contains multiple names (separated by newlines), process each separately
|
||||
elif '\n' in cell_str or '\\n' in cell_str:
|
||||
cell_parts = [part.strip() for part in cell_str.replace('\\n', '\n').split('\n') if part.strip()]
|
||||
for part in cell_parts:
|
||||
if self._is_likely_teacher_name(part):
|
||||
# Look for context on the left (department) and right (subject)
|
||||
left_context = ""
|
||||
right_context = ""
|
||||
|
||||
# Get left neighbor (department)
|
||||
if j > 0 and j-1 < len(current_row):
|
||||
left_context = str(current_row[j-1]).strip()
|
||||
|
||||
# Get right neighbor (subject)
|
||||
if j < len(current_row) - 1:
|
||||
right_context = str(current_row[j+1]).strip()
|
||||
|
||||
# Try to determine the subject based on adjacency
|
||||
matched_subject = None
|
||||
|
||||
# First priority: right neighbor if it matches a subject in the header
|
||||
if right_context and j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# Second priority: use left context if it semantically relates to a teacher
|
||||
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
|
||||
# If left context indicates a department, look for subject to the right of teacher
|
||||
if j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# If no subject to the right, try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
# Third priority: try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
|
||||
# Only add if we don't have a better teacher name for this subject yet
|
||||
if matched_subject and (matched_subject not in teacher_subject_map or
|
||||
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
|
||||
teacher_subject_map[matched_subject] = part
|
||||
|
||||
# Additional teacher-subject mapping: scan the rows immediately before the header for teacher names in subject columns
|
||||
# In many CSV files, teacher names appear in the same rows as subject headers
|
||||
for i in range(max(0, header_idx - 5), header_idx): # Check 5 rows before header
|
||||
current_row = rows[i]
|
||||
for j, cell_value in enumerate(current_row):
|
||||
cell_str = str(cell_value).strip()
|
||||
|
||||
# If cell contains a likely teacher name and corresponds to a subject column
|
||||
if self._is_likely_teacher_name(cell_str) and j in header_subjects:
|
||||
subject_name = header_subjects[j]
|
||||
# Only add if we don't have a better teacher name for this subject yet
|
||||
if (subject_name not in teacher_subject_map or
|
||||
'Default Teacher for' in teacher_subject_map.get(subject_name, '')):
|
||||
teacher_subject_map[subject_name] = cell_str
|
||||
|
||||
# Additional validation: Remove any teacher-subject mappings that seem incorrect
|
||||
validated_teacher_subject_map = {}
|
||||
for subject, teacher in teacher_subject_map.items():
|
||||
# Only add to validated map if teacher name passes all checks
|
||||
if self._is_likely_teacher_name(teacher):
|
||||
validated_teacher_subject_map[subject] = teacher
|
||||
else:
|
||||
print(f"Warning: Invalid teacher name '{teacher}' detected for subject '{subject}', skipping...")
|
||||
|
||||
teacher_subject_map = validated_teacher_subject_map
|
||||
|
||||
# Process each student row
|
||||
for student_row in rows[header_idx + 1:]:
|
||||
# Determine the structure dynamically based on the header
|
||||
class_col_idx = None
|
||||
name_col_idx = None
|
||||
|
||||
# Find the index of the class column (usually called "Класс")
|
||||
for idx, header in enumerate(header_row):
|
||||
if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header):
|
||||
class_col_idx = idx
|
||||
break
|
||||
|
||||
# Find the index of the name column (usually called "ФИО")
|
||||
for idx, header in enumerate(header_row):
|
||||
if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower():
|
||||
name_col_idx = idx
|
||||
break
|
||||
|
||||
# If we couldn't find the columns properly, skip this row
|
||||
if class_col_idx is None or name_col_idx is None:
|
||||
continue
|
||||
|
||||
# Check if this row has valid data in the expected columns
|
||||
if (len(student_row) > max(class_col_idx, name_col_idx) and
|
||||
student_row[class_col_idx].strip() and # class name exists
|
||||
student_row[name_col_idx].strip() and # student name exists
|
||||
self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)):
|
||||
|
||||
name = student_row[name_col_idx].strip() # Name column
|
||||
class_name = student_row[class_col_idx].strip() # Class column
|
||||
|
||||
# Insert student into the database
|
||||
self.cursor.execute(
|
||||
"INSERT OR IGNORE INTO students (class_name, full_name) VALUES (?, ?)",
|
||||
(class_name, name)
|
||||
)
|
||||
|
||||
# Get the student_id for this student
|
||||
self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, class_name))
|
||||
student_id_result = self.cursor.fetchone()
|
||||
if student_id_result is None:
|
||||
continue
|
||||
student_id = student_id_result[0]
|
||||
|
||||
# Process schedule data for this student
|
||||
# Go through each column to find subject and group info
|
||||
for col_idx, cell_value in enumerate(student_row):
|
||||
if cell_value and col_idx < len(header_row):
|
||||
# Get the subject from the header
|
||||
subject_header = header_row[col_idx] if col_idx < len(header_row) else ""
|
||||
|
||||
# Skip columns that don't contain schedule information
|
||||
if (col_idx == 0 or col_idx == 1 or col_idx == 2 or col_idx == class_col_idx or col_idx == name_col_idx or # skip metadata cols
|
||||
"сортировка" in subject_header.lower() or
|
||||
"номер" in subject_header.lower() or
|
||||
"шкафчика" in subject_header.lower() or
|
||||
"локера" in subject_header.lower()):
|
||||
continue
|
||||
|
||||
# Extract group information from the cell
|
||||
group_assignment = cell_value.strip()
|
||||
|
||||
if group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
|
||||
# Find the teacher associated with this subject
|
||||
subject_name = str(subject_header).strip()
|
||||
teacher_name = teacher_subject_map.get(subject_name, f"Default Teacher for {subject_name}")
|
||||
|
||||
# Insert the entities into their respective tables first
|
||||
# Then get their IDs to create the schedule entry
|
||||
self._process_schedule_entry_with_teacher_mapping(
|
||||
student_id, group_assignment, subject_name, teacher_name
|
||||
)
|
||||
|
||||
self.conn.commit()
|
||||
return True
|
||||
|
||||
def _is_valid_student_record_by_cols(self, row, class_col_idx, name_col_idx):
|
||||
"""Check if a row represents a valid student record based on specific columns"""
|
||||
# A valid student record should have:
|
||||
# - Non-empty class name in the class column
|
||||
# - Non-empty student name in the name column
|
||||
|
||||
if len(row) <= max(class_col_idx, name_col_idx):
|
||||
return False
|
||||
|
||||
class_name = row[class_col_idx].strip() if len(row) > class_col_idx else ""
|
||||
student_name = row[name_col_idx].strip() if len(row) > name_col_idx else ""
|
||||
|
||||
# Check if the class name looks like an actual class (contains a number followed by a letter)
|
||||
class_pattern = r'^\d+[А-ЯA-Z]$' # e.g., 6А, 11А, 4B
|
||||
if re.match(class_pattern, class_name):
|
||||
return bool(student_name and student_name != class_name) # Ensure name exists and is different from class
|
||||
|
||||
# If not matching class pattern, check if the name field is not just another class-like value
|
||||
name_pattern = r'^\d+[А-ЯA-Z]$' # This would indicate it's probably a class, not a name
|
||||
if re.match(name_pattern, student_name):
|
||||
return False # This row has a class in the name field, so not valid
|
||||
|
||||
return bool(class_name and student_name and class_name != student_name)
|
||||
|
||||
def _process_schedule_entry_with_teacher_mapping(self, student_id, group_info, subject_info, teacher_name):
|
||||
"""Process individual schedule entries with explicit teacher mapping and insert into normalized tables"""
|
||||
# Clean up the inputs
|
||||
subject_name = subject_info.strip() if subject_info.strip() else "General Class"
|
||||
group_assignment = group_info.strip()
|
||||
|
||||
# Only proceed if we have valid data
|
||||
if subject_name and group_assignment and group_assignment.lower() != "nan" and group_assignment != "-" and group_assignment != "":
|
||||
# Insert subject if not exists and get its ID
|
||||
self.cursor.execute("INSERT OR IGNORE INTO subjects (name) VALUES (?)", (subject_name,))
|
||||
self.cursor.execute("SELECT subject_id FROM subjects WHERE name = ?", (subject_name,))
|
||||
subject_id = self.cursor.fetchone()[0]
|
||||
|
||||
# Insert teacher if not exists and get its ID
|
||||
# Use the teacher name as is, without default creation if not found
|
||||
self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (teacher_name,))
|
||||
self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (teacher_name,))
|
||||
teacher_result = self.cursor.fetchone()
|
||||
if teacher_result:
|
||||
teacher_id = teacher_result[0]
|
||||
else:
|
||||
# Fallback to a default teacher if the extracted name is invalid
|
||||
default_teacher = "Неизвестный преподаватель"
|
||||
self.cursor.execute("INSERT OR IGNORE INTO teachers (name) VALUES (?)", (default_teacher,))
|
||||
self.cursor.execute("SELECT teacher_id FROM teachers WHERE name = ?", (default_teacher,))
|
||||
teacher_id = self.cursor.fetchone()[0]
|
||||
|
||||
# Use a default day for now (in a real system, we'd extract this from the schedule)
|
||||
# For now, we'll randomly assign to a day of the week
|
||||
import random
|
||||
days_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
|
||||
selected_day = random.choice(days_list)
|
||||
self.cursor.execute("INSERT OR IGNORE INTO days (name) VALUES (?)", (selected_day,))
|
||||
self.cursor.execute("SELECT day_id FROM days WHERE name = ?", (selected_day,))
|
||||
day_id = self.cursor.fetchone()[0]
|
||||
|
||||
# Use a default period - for now we'll use period 1, but in a real system
|
||||
# we would need to extract this from the CSV if available
|
||||
self.cursor.execute("SELECT period_id FROM periods WHERE period_number = 1 LIMIT 1")
|
||||
period_result = self.cursor.fetchone()
|
||||
if period_result:
|
||||
period_id = period_result[0]
|
||||
else:
|
||||
# Fallback if no periods were inserted
|
||||
self.cursor.execute("SELECT period_id FROM periods LIMIT 1")
|
||||
period_id = self.cursor.fetchone()[0]
|
||||
|
||||
# Clean the group name to separate it from student data
|
||||
group_name = self._clean_group_name(group_assignment)
|
||||
self.cursor.execute("INSERT OR IGNORE INTO groups (name) VALUES (?)", (group_name,))
|
||||
self.cursor.execute("SELECT group_id FROM groups WHERE name = ?", (group_name,))
|
||||
group_id = self.cursor.fetchone()[0]
|
||||
|
||||
# Insert the schedule entry
|
||||
self.cursor.execute("""
|
||||
INSERT OR IGNORE INTO schedule (student_id, subject_id, teacher_id, day_id, period_id, group_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (student_id, subject_id, teacher_id, day_id, period_id, group_id))
|
||||
|
||||
def _clean_group_name(self, raw_group_data):
|
||||
"""Extract clean group name from potentially mixed student/group data"""
|
||||
# Remove potential student names from the group data
|
||||
# Group names typically contain numbers, class identifiers, or specific activity names
|
||||
cleaned = raw_group_data.strip()
|
||||
|
||||
# If the group data looks like it contains a student name pattern,
|
||||
# we'll try to extract just the group identifier part
|
||||
if re.match(r'^\d+[А-ЯA-Z]', cleaned):
|
||||
# This looks like a class designation, return as is
|
||||
return cleaned
|
||||
|
||||
# If the group data contains common group indicators, return as is
|
||||
group_indicators = ['кл', 'class', 'club', 'track', 'group', 'module', '-']
|
||||
if any(indicator in cleaned.lower() for indicator in group_indicators):
|
||||
return cleaned
|
||||
|
||||
# If the group data looks like a subject-identifier pattern, return as is
|
||||
subject_indicators = ['ICT', 'English', 'Math', 'Physics', 'Chemistry', 'Biology', 'Science']
|
||||
if any(indicator in cleaned for indicator in subject_indicators):
|
||||
return cleaned
|
||||
|
||||
# If none of the above conditions match, return a generic group name
|
||||
return f"Group_{hash(cleaned) % 10000}"
|
||||
|
||||
def _is_likely_teacher_name(self, text):
|
||||
"""Check if the text is likely to be a teacher name"""
|
||||
if not text or len(text.strip()) < 5: # Require minimum length for a name
|
||||
return False
|
||||
|
||||
text = text.strip()
|
||||
|
||||
# Common non-name values that appear in the CSV
|
||||
common_non_names = ['-', 'nan', 'нет', 'нету', 'отсутствует', 'учитель', 'teacher', '', 'Е4 Е5', 'E4 E5', 'группа', 'group']
|
||||
if text.lower() in common_non_names:
|
||||
return False
|
||||
|
||||
# Exclusion patterns for non-teacher entries
|
||||
exclusion_patterns = [
|
||||
r'^[А-ЯЁ]\d+\s+[А-ЯЁ]\d+$', # E4 E5 pattern
|
||||
r'^[A-Z]\d+\s+[A-Z]\d+$', # English groups
|
||||
r'.*[Tt]rack.*', # Track identifiers
|
||||
r'.*[Gg]roup.*', # Group identifiers
|
||||
r'.*\d+[А-ЯA-Z]\d*$', # Number-letter combos
|
||||
r'^[А-ЯЁA-Z].*\d+', # Text ending with digits
|
||||
r'.*[Cc]lub.*', # Club identifiers
|
||||
]
|
||||
|
||||
for pattern in exclusion_patterns:
|
||||
if re.match(pattern, text, re.IGNORECASE):
|
||||
return False
|
||||
|
||||
# Positive patterns for teacher names
|
||||
teacher_patterns = [
|
||||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ]\.\s*[А-ЯЁ]\.$', # Иванов А.А.
|
||||
r'^[А-ЯЁ]\.\s*[А-ЯЁ]\.\s+[А-ЯЁ][а-яё]+$', # А.А. Иванов
|
||||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Full name
|
||||
r'^[A-Z][a-z]+\s+[A-Z][a-z]+$', # John Smith
|
||||
r'^[A-Z][a-z]+\s+[A-Z]\.\s*[A-Z]\.$', # Smith J.J.
|
||||
r'^[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+$', # Russian names without patronymic
|
||||
]
|
||||
|
||||
for pattern in teacher_patterns:
|
||||
if re.match(pattern, text.strip()):
|
||||
return True
|
||||
|
||||
# Additional check: if it looks like a proper name (with capital letters and min length)
|
||||
# and doesn't match exclusion patterns
|
||||
name_parts = text.split()
|
||||
if len(name_parts) >= 2:
|
||||
# At least two parts (first name + last name)
|
||||
# Check if they start with capital letters
|
||||
if all(part[0].isupper() for part in name_parts if len(part) > 1):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_likely_subject_label(self, text):
|
||||
"""Check if text is likely a subject label like 'Матем.', 'Информ.', 'Англ.яз', etc."""
|
||||
if not text or len(text) < 2:
|
||||
return False
|
||||
|
||||
# Common Russian abbreviations for subjects
|
||||
subject_patterns = [
|
||||
'Матем.', 'Информ.', 'Англ.яз', 'Русск.яз', 'Физика', 'Химия', 'Биол', 'История',
|
||||
'Общество', 'География', 'Литер', 'Физкульт', 'Технотрек', 'Лидерство',
|
||||
'Спорт. клуб', 'ОРКСЭ', 'Китайск', 'Немецк', 'Француз', 'Speaking club', 'Maths',
|
||||
'ICT', 'Geography', 'Physics', 'Robotics', 'Culinary', 'Science', 'AI Core', 'VR/AR',
|
||||
'CyberSafety', 'Business', 'Design', 'Prototype', 'MediaCom', 'Science', 'Robotics',
|
||||
'Culinary', 'AI Core', 'VR/AR', 'CyberSafety', 'Business', 'Design', 'Prototype',
|
||||
'MediaCom', 'Robotics Track', 'Culinary Track', 'Science Track', 'AI Core Track',
|
||||
'VR/AR Track', 'CyberSafety Track', 'Business Track', 'Design Track', 'Prototype Track',
|
||||
'MediaCom Track', 'Math', 'Algebra', 'Geometry', 'Calculus', 'Statistics', 'Coding',
|
||||
'Programming', 'Algorithm', 'Logic', 'Robotics', 'Physical Education', 'PE', 'Sports',
|
||||
'Swimming', 'Fitness', 'Gymnastics', 'Climbing', 'Games', 'Art', 'Music', 'Dance',
|
||||
'Karate', 'Judo', 'Martial Arts', 'Chess', 'Leadership', 'Entrepreneurship'
|
||||
]
|
||||
|
||||
text_clean = text.strip().lower()
|
||||
for pattern in subject_patterns:
|
||||
if pattern.lower() in text_clean:
|
||||
return True
|
||||
|
||||
# Also check for specific subject names found in the data
|
||||
specific_subjects = ['матем.', 'информ.', 'англ.яз', 'русск.яз', 'каб.', 'business', 'maths',
|
||||
'speaking', 'ict', 'geography', 'physics', 'robotics', 'science', 'ai core',
|
||||
'vr/ar', 'cybersafety', 'design', 'prototype', 'mediacom', 'culinary',
|
||||
'physical education', 'pe', 'sports', 'swimming', 'fitness', 'gymnastics',
|
||||
'climbing', 'games', 'art', 'music', 'dance', 'karate', 'chess', 'leadership']
|
||||
for subj in specific_subjects:
|
||||
if subj in text_clean:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _find_matching_subject_in_header_from_list(self, subject_label, header_subjects, header_row):
|
||||
"""Find the matching full subject name in the header based on the label"""
|
||||
if not subject_label:
|
||||
return None
|
||||
|
||||
# Look for the best match in the header subjects
|
||||
subject_label_lower = subject_label.lower().replace('.', '').replace('яз', 'язык')
|
||||
|
||||
# Direct match first
|
||||
for col_idx, full_subj in header_subjects:
|
||||
if subject_label_lower in full_subj.lower() or full_subj.lower() in subject_label_lower:
|
||||
return full_subj
|
||||
|
||||
# If no direct match, try to find by partial matching in the whole header row
|
||||
for i, header_item in enumerate(header_row):
|
||||
if subject_label_lower in str(header_item).lower() or str(header_item).lower() in subject_label_lower:
|
||||
return str(header_item).strip()
|
||||
|
||||
# Try more general matching - if label contains common abbreviations
|
||||
for col_idx, full_subj in header_subjects:
|
||||
full_lower = full_subj.lower()
|
||||
if ('матем' in subject_label_lower and 'матем' in full_lower) or \
|
||||
('информ' in subject_label_lower and 'информ' in full_lower) or \
|
||||
('англ' in subject_label_lower and 'англ' in full_lower) or \
|
||||
('русск' in subject_label_lower and 'русск' in full_lower) or \
|
||||
('физик' in subject_label_lower and 'физик' in full_lower) or \
|
||||
('хим' in subject_label_lower and 'хим' in full_lower) or \
|
||||
('биол' in subject_label_lower and 'биол' in full_lower) or \
|
||||
('истор' in subject_label_lower and 'истор' in full_lower) or \
|
||||
('общ' in subject_label_lower and 'общ' in full_lower) or \
|
||||
('географ' in subject_label_lower and 'географ' in full_lower):
|
||||
return full_subj
|
||||
|
||||
return None
|
||||
|
||||
def find_student(self, name_query):
|
||||
"""Search for students by name"""
|
||||
self.cursor.execute("""
|
||||
SELECT s.full_name, s.class_name
|
||||
FROM students s
|
||||
WHERE s.full_name LIKE ?
|
||||
LIMIT 10
|
||||
""", (f'%{name_query}%',))
|
||||
|
||||
return self.cursor.fetchall()
|
||||
|
||||
def get_current_class(self, student_name, current_day, current_time):
|
||||
"""Find student's current class"""
|
||||
self.cursor.execute("""
|
||||
SELECT sub.name, t.name, p.start_time, p.end_time
|
||||
FROM schedule sch
|
||||
JOIN students s ON sch.student_id = s.student_id
|
||||
JOIN subjects sub ON sch.subject_id = sub.subject_id
|
||||
JOIN teachers t ON sch.teacher_id = t.teacher_id
|
||||
JOIN days d ON sch.day_id = d.day_id
|
||||
JOIN periods p ON sch.period_id = p.period_id
|
||||
JOIN groups g ON sch.group_id = g.group_id
|
||||
WHERE s.full_name = ?
|
||||
AND d.name = ?
|
||||
AND p.start_time <= ?
|
||||
AND p.end_time >= ?
|
||||
""", (student_name, current_day, current_time, current_time))
|
||||
|
||||
return self.cursor.fetchone()
|
||||
|
||||
def close(self):
|
||||
"""Close database connection"""
|
||||
self.conn.close()
|
||||
|
||||
# Main execution - just setup database
|
||||
if __name__ == "__main__":
|
||||
db = SchoolScheduleDB()
|
||||
# Check if auto-update flag is passed as argument
|
||||
auto_update = len(sys.argv) > 1 and sys.argv[1] == '--auto'
|
||||
db.update_database_from_csv(auto_update=auto_update)
|
||||
db.close()
|
||||
Reference in New Issue
Block a user