self.conn.commit() return True def process_csv_with_teacher_mapping(self, csv_file): """Process CSV with teacher-subject mapping based on positional order""" if not os.path.exists(csv_file): return False with open(csv_file, 'r', encoding='utf-8') as file: reader = csv.reader(file) rows = list(reader) # Identify header row - look for the row containing "ФИО" (full name) or similar indicators header_idx = None for i, row in enumerate(rows): for cell in row: if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell): header_idx = i break if header_idx is not None: break if header_idx is None: # Check if this file contains class and name columns that identify it as a student data file # Even if the header doesn't contain ФИО, we might still be able to identify student data has_class_indicators = any( any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) for row in rows[:min(len(rows), 10)] # Check first 10 rows ) has_name_indicators = any( any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']) for row in rows[:min(len(rows), 10)] # Check first 10 rows ) if has_class_indicators and has_name_indicators: # Try to find the header row by looking for class and name indicators for i, row in enumerate(rows): if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \ any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']): header_idx = i break if header_idx is None: print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns") return False # Find teacher-subject mappings in the first 0-15 rows before the header teacher_subject_map = {} # Build a mapping of subject names in the header row header_row = rows[header_idx] header_subjects = {} for col_idx, subject_name in enumerate(header_row): subject_name = str(subject_name).strip() if (subject_name and subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкафчика', 'локера'] and subject_name.strip() != "" and "ф.и.о" not in subject_name.lower() and "сортировка" not in subject_name.lower() and "номер" not in subject_name.lower() and "№" not in subject_name): header_subjects[col_idx] = subject_name # Map column index to subject name # Process rows before the header to find teacher names and map them to subjects for i in range(min(15, header_idx)): # Check first 15 rows before header current_row = rows[i] # Process all cells in the row to find teacher names and their adjacent context for j, cell_value in enumerate(current_row): cell_str = str(cell_value).strip() # Check if this cell is a likely teacher name if self._is_likely_teacher_name(cell_str): # Look for context on the left (department) and right (subject) left_context = "" right_context = "" # Get left neighbor (department) if j > 0 and j-1 < len(current_row): left_context = str(current_row[j-1]).strip() # Get right neighbor (subject) if j < len(current_row) - 1: right_context = str(current_row[j+1]).strip() # Try to determine the subject based on adjacency matched_subject = None # First priority: right neighbor if it matches a subject in the header if right_context and j+1 in header_subjects: matched_subject = header_subjects[j+1] # Second priority: use left context if it semantically relates to a teacher elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']): # If left context indicates a department, look for subject to the right of teacher if j+1 in header_subjects: matched_subject = header_subjects[j+1] # If no subject to the right, try to map by position elif j in header_subjects: matched_subject = header_subjects[j] # Third priority: try to map by position elif j in header_subjects: matched_subject = header_subjects[j] # Only add if we don't have a better teacher name for this subject yet if matched_subject and (matched_subject not in teacher_subject_map or 'Default Teacher for' in teacher_subject_map.get(matched_subject, '')): teacher_subject_map[matched_subject] = cell_str # If the cell contains multiple names (separated by newlines), process each separately elif '\n' in cell_str or '\\n' in cell_str: cell_parts = [part.strip() for part in cell_str.replace('\\n', '\n').split('\n') if part.strip()] for part in cell_parts: if self._is_likely_teacher_name(part): # Look for context on the left (department) and right (subject) left_context = "" right_context = "" # Get left neighbor (department) if j > 0 and j-1 < len(current_row): left_context = str(current_row[j-1]).strip() # Get right neighbor (subject) if j < len(current_row) - 1: right_context = str(current_row[j+1]).strip() # Try to determine the subject based on adjacency matched_subject = None # First priority: right neighbor if it matches a subject in the header if right_context and j+1 in header_subjects: matched_subject = header_subjects[j+1] # Second priority: use left context if it semantically relates to a teacher elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']): # If left context indicates a department, look for subject to the right of teacher if j+1 in header_subjects: matched_subject = header_subjects[j+1] # If no subject to the right, try to map by position elif j in header_subjects: matched_subject = header_subjects[j] # Third priority: try to map by position elif j in header_subjects: matched_subject = header_subjects[j] # Only add if we don't have a better teacher name for this subject yet if matched_subject and (matched_subject not in teacher_subject_map or 'Default Teacher for' in teacher_subject_map.get(matched_subject, '')): teacher_subject_map[matched_subject] = part # Additional validation: Remove any teacher-subject mappings that seem incorrect validated_teacher_subject_map = {} for subject, teacher in teacher_subject_map.items(): # Only add to validated map if teacher name passes all checks if self._is_likely_teacher_name(teacher): validated_teacher_subject_map[subject] = teacher else: print(f"Warning: Invalid teacher name '{teacher}' detected for subject '{subject}', skipping...") teacher_subject_map = validated_teacher_subject_map # Additional teacher-subject mapping: scan the data rows for teacher names paired with subjects # In many CSV files, teacher names appear in the same rows as subject data for i in range(header_idx + 1, min(len(rows), header_idx + 50)): # Check first 50 data rows current_row = rows[i] for j, cell_value in enumerate(current_row): cell_str = str(cell_value).strip() # If cell contains a likely teacher name and corresponds to a subject column if self._is_likely_teacher_name(cell_str) and j in header_subjects: subject_name = header_subjects[j] # Only add if we don't have a better teacher name for this subject yet if (subject_name not in teacher_subject_map or 'Default Teacher for' in teacher_subject_map.get(subject_name, '')): teacher_subject_map[subject_name] = cell_str # Process each student row for student_row in rows[header_idx + 1:]: # Determine the structure dynamically based on the header class_col_idx = None name_col_idx = None # Find the index of the class column (usually called "Класс") for idx, header in enumerate(header_row): if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header): class_col_idx = idx break # Find the index of the name column (usually called "ФИО") for idx, header in enumerate(header_row): if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower(): name_col_idx = idx break # If we couldn't find the columns properly, skip this row if class_col_idx is None or name_col_idx is None: continue # Check if this row has valid data in the expected columns if (len(student_row) > max(class_col_idx, name_col_idx) and student_row[class_col_idx].strip() and # class name exists student_row[name_col_idx].strip() and # student name exists self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)): name = student_row[name_col_idx].strip() # Name column class_name = student_row[class_col_idx].strip() # Class column # Insert student into the database self.cursor.execute( "INSERT OR IGNORE INTO students (class_name, full_name) VALUES (?, ?)", (class_name, name) ) # Get the student_id for this student self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, class_name)) student_id_result = self.cursor.fetchone() if student_id_result is None: continue student_id = student_id_result[0] # Process schedule data for this student