Files
ai7-m3/scheduler_bots/database_fixed.py
2026-02-05 10:15:09 +03:00

221 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
self.conn.commit()
return True
def process_csv_with_teacher_mapping(self, csv_file):
"""Process CSV with teacher-subject mapping based on positional order"""
if not os.path.exists(csv_file):
return False
with open(csv_file, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
rows = list(reader)
# Identify header row - look for the row containing "ФИО" (full name) or similar indicators
header_idx = None
for i, row in enumerate(rows):
for cell in row:
if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell):
header_idx = i
break
if header_idx is not None:
break
if header_idx is None:
# Check if this file contains class and name columns that identify it as a student data file
# Even if the header doesn't contain ФИО, we might still be able to identify student data
has_class_indicators = any(
any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class'])
for row in rows[:min(len(rows), 10)] # Check first 10 rows
)
has_name_indicators = any(
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname'])
for row in rows[:min(len(rows), 10)] # Check first 10 rows
)
if has_class_indicators and has_name_indicators:
# Try to find the header row by looking for class and name indicators
for i, row in enumerate(rows):
if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']):
header_idx = i
break
if header_idx is None:
print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns")
return False
# Find teacher-subject mappings in the first 0-15 rows before the header
teacher_subject_map = {}
# Build a mapping of subject names in the header row
header_row = rows[header_idx]
header_subjects = {}
for col_idx, subject_name in enumerate(header_row):
subject_name = str(subject_name).strip()
if (subject_name and
subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкафчика', 'локера'] and
subject_name.strip() != "" and
"ф.и.о" not in subject_name.lower() and
"сортировка" not in subject_name.lower() and
"номер" not in subject_name.lower() and
"" not in subject_name):
header_subjects[col_idx] = subject_name # Map column index to subject name
# Process rows before the header to find teacher names and map them to subjects
for i in range(min(15, header_idx)): # Check first 15 rows before header
current_row = rows[i]
# Process all cells in the row to find teacher names and their adjacent context
for j, cell_value in enumerate(current_row):
cell_str = str(cell_value).strip()
# Check if this cell is a likely teacher name
if self._is_likely_teacher_name(cell_str):
# Look for context on the left (department) and right (subject)
left_context = ""
right_context = ""
# Get left neighbor (department)
if j > 0 and j-1 < len(current_row):
left_context = str(current_row[j-1]).strip()
# Get right neighbor (subject)
if j < len(current_row) - 1:
right_context = str(current_row[j+1]).strip()
# Try to determine the subject based on adjacency
matched_subject = None
# First priority: right neighbor if it matches a subject in the header
if right_context and j+1 in header_subjects:
matched_subject = header_subjects[j+1]
# Second priority: use left context if it semantically relates to a teacher
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
# If left context indicates a department, look for subject to the right of teacher
if j+1 in header_subjects:
matched_subject = header_subjects[j+1]
# If no subject to the right, try to map by position
elif j in header_subjects:
matched_subject = header_subjects[j]
# Third priority: try to map by position
elif j in header_subjects:
matched_subject = header_subjects[j]
# Only add if we don't have a better teacher name for this subject yet
if matched_subject and (matched_subject not in teacher_subject_map or
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
teacher_subject_map[matched_subject] = cell_str
# If the cell contains multiple names (separated by newlines), process each separately
elif '\n' in cell_str or '\\n' in cell_str:
cell_parts = [part.strip() for part in cell_str.replace('\\n', '\n').split('\n') if part.strip()]
for part in cell_parts:
if self._is_likely_teacher_name(part):
# Look for context on the left (department) and right (subject)
left_context = ""
right_context = ""
# Get left neighbor (department)
if j > 0 and j-1 < len(current_row):
left_context = str(current_row[j-1]).strip()
# Get right neighbor (subject)
if j < len(current_row) - 1:
right_context = str(current_row[j+1]).strip()
# Try to determine the subject based on adjacency
matched_subject = None
# First priority: right neighbor if it matches a subject in the header
if right_context and j+1 in header_subjects:
matched_subject = header_subjects[j+1]
# Second priority: use left context if it semantically relates to a teacher
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
# If left context indicates a department, look for subject to the right of teacher
if j+1 in header_subjects:
matched_subject = header_subjects[j+1]
# If no subject to the right, try to map by position
elif j in header_subjects:
matched_subject = header_subjects[j]
# Third priority: try to map by position
elif j in header_subjects:
matched_subject = header_subjects[j]
# Only add if we don't have a better teacher name for this subject yet
if matched_subject and (matched_subject not in teacher_subject_map or
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
teacher_subject_map[matched_subject] = part
# Additional validation: Remove any teacher-subject mappings that seem incorrect
validated_teacher_subject_map = {}
for subject, teacher in teacher_subject_map.items():
# Only add to validated map if teacher name passes all checks
if self._is_likely_teacher_name(teacher):
validated_teacher_subject_map[subject] = teacher
else:
print(f"Warning: Invalid teacher name '{teacher}' detected for subject '{subject}', skipping...")
teacher_subject_map = validated_teacher_subject_map
# Additional teacher-subject mapping: scan the data rows for teacher names paired with subjects
# In many CSV files, teacher names appear in the same rows as subject data
for i in range(header_idx + 1, min(len(rows), header_idx + 50)): # Check first 50 data rows
current_row = rows[i]
for j, cell_value in enumerate(current_row):
cell_str = str(cell_value).strip()
# If cell contains a likely teacher name and corresponds to a subject column
if self._is_likely_teacher_name(cell_str) and j in header_subjects:
subject_name = header_subjects[j]
# Only add if we don't have a better teacher name for this subject yet
if (subject_name not in teacher_subject_map or
'Default Teacher for' in teacher_subject_map.get(subject_name, '')):
teacher_subject_map[subject_name] = cell_str
# Process each student row
for student_row in rows[header_idx + 1:]:
# Determine the structure dynamically based on the header
class_col_idx = None
name_col_idx = None
# Find the index of the class column (usually called "Класс")
for idx, header in enumerate(header_row):
if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header):
class_col_idx = idx
break
# Find the index of the name column (usually called "ФИО")
for idx, header in enumerate(header_row):
if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower():
name_col_idx = idx
break
# If we couldn't find the columns properly, skip this row
if class_col_idx is None or name_col_idx is None:
continue
# Check if this row has valid data in the expected columns
if (len(student_row) > max(class_col_idx, name_col_idx) and
student_row[class_col_idx].strip() and # class name exists
student_row[name_col_idx].strip() and # student name exists
self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)):
name = student_row[name_col_idx].strip() # Name column
class_name = student_row[class_col_idx].strip() # Class column
# Insert student into the database
self.cursor.execute(
"INSERT OR IGNORE INTO students (class_name, full_name) VALUES (?, ?)",
(class_name, name)
)
# Get the student_id for this student
self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, class_name))
student_id_result = self.cursor.fetchone()
if student_id_result is None:
continue
student_id = student_id_result[0]
# Process schedule data for this student