Added
This commit is contained in:
220
scheduler_bots/database_fixed.py
Normal file
220
scheduler_bots/database_fixed.py
Normal file
@@ -0,0 +1,220 @@
|
||||
self.conn.commit()
|
||||
return True
|
||||
|
||||
def process_csv_with_teacher_mapping(self, csv_file):
|
||||
"""Process CSV with teacher-subject mapping based on positional order"""
|
||||
if not os.path.exists(csv_file):
|
||||
return False
|
||||
|
||||
with open(csv_file, 'r', encoding='utf-8') as file:
|
||||
reader = csv.reader(file)
|
||||
rows = list(reader)
|
||||
|
||||
# Identify header row - look for the row containing "ФИО" (full name) or similar indicators
|
||||
header_idx = None
|
||||
for i, row in enumerate(rows):
|
||||
for cell in row:
|
||||
if "ФИО" in str(cell) or "фио" in str(cell).lower() or "Ф.И.О." in str(cell) or "ф.и.о." in str(cell):
|
||||
header_idx = i
|
||||
break
|
||||
if header_idx is not None:
|
||||
break
|
||||
|
||||
if header_idx is None:
|
||||
# Check if this file contains class and name columns that identify it as a student data file
|
||||
# Even if the header doesn't contain ФИО, we might still be able to identify student data
|
||||
has_class_indicators = any(
|
||||
any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class'])
|
||||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||||
)
|
||||
|
||||
has_name_indicators = any(
|
||||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname'])
|
||||
for row in rows[:min(len(rows), 10)] # Check first 10 rows
|
||||
)
|
||||
|
||||
if has_class_indicators and has_name_indicators:
|
||||
# Try to find the header row by looking for class and name indicators
|
||||
for i, row in enumerate(rows):
|
||||
if any(indicator in str(cell).lower() for cell in row for indicator in ['класс', 'class']) and \
|
||||
any(indicator in str(cell).lower() for cell in row for indicator in ['имя', 'name', 'фамилия', 'surname']):
|
||||
header_idx = i
|
||||
break
|
||||
|
||||
if header_idx is None:
|
||||
print(f"Skipping {csv_file} - does not appear to be student data with ФИО/class columns")
|
||||
return False
|
||||
|
||||
# Find teacher-subject mappings in the first 0-15 rows before the header
|
||||
teacher_subject_map = {}
|
||||
|
||||
# Build a mapping of subject names in the header row
|
||||
header_row = rows[header_idx]
|
||||
header_subjects = {}
|
||||
for col_idx, subject_name in enumerate(header_row):
|
||||
subject_name = str(subject_name).strip()
|
||||
if (subject_name and
|
||||
subject_name.lower() not in ['ф.и.о.', 'фио', 'класс', 'номер', 'сортировка', 'шкафчика', 'локера'] and
|
||||
subject_name.strip() != "" and
|
||||
"ф.и.о" not in subject_name.lower() and
|
||||
"сортировка" not in subject_name.lower() and
|
||||
"номер" not in subject_name.lower() and
|
||||
"№" not in subject_name):
|
||||
header_subjects[col_idx] = subject_name # Map column index to subject name
|
||||
|
||||
# Process rows before the header to find teacher names and map them to subjects
|
||||
for i in range(min(15, header_idx)): # Check first 15 rows before header
|
||||
current_row = rows[i]
|
||||
|
||||
# Process all cells in the row to find teacher names and their adjacent context
|
||||
for j, cell_value in enumerate(current_row):
|
||||
cell_str = str(cell_value).strip()
|
||||
|
||||
# Check if this cell is a likely teacher name
|
||||
if self._is_likely_teacher_name(cell_str):
|
||||
# Look for context on the left (department) and right (subject)
|
||||
left_context = ""
|
||||
right_context = ""
|
||||
|
||||
# Get left neighbor (department)
|
||||
if j > 0 and j-1 < len(current_row):
|
||||
left_context = str(current_row[j-1]).strip()
|
||||
|
||||
# Get right neighbor (subject)
|
||||
if j < len(current_row) - 1:
|
||||
right_context = str(current_row[j+1]).strip()
|
||||
|
||||
# Try to determine the subject based on adjacency
|
||||
matched_subject = None
|
||||
|
||||
# First priority: right neighbor if it matches a subject in the header
|
||||
if right_context and j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# Second priority: use left context if it semantically relates to a teacher
|
||||
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
|
||||
# If left context indicates a department, look for subject to the right of teacher
|
||||
if j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# If no subject to the right, try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
# Third priority: try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
|
||||
# Only add if we don't have a better teacher name for this subject yet
|
||||
if matched_subject and (matched_subject not in teacher_subject_map or
|
||||
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
|
||||
teacher_subject_map[matched_subject] = cell_str
|
||||
|
||||
# If the cell contains multiple names (separated by newlines), process each separately
|
||||
elif '\n' in cell_str or '\\n' in cell_str:
|
||||
cell_parts = [part.strip() for part in cell_str.replace('\\n', '\n').split('\n') if part.strip()]
|
||||
for part in cell_parts:
|
||||
if self._is_likely_teacher_name(part):
|
||||
# Look for context on the left (department) and right (subject)
|
||||
left_context = ""
|
||||
right_context = ""
|
||||
|
||||
# Get left neighbor (department)
|
||||
if j > 0 and j-1 < len(current_row):
|
||||
left_context = str(current_row[j-1]).strip()
|
||||
|
||||
# Get right neighbor (subject)
|
||||
if j < len(current_row) - 1:
|
||||
right_context = str(current_row[j+1]).strip()
|
||||
|
||||
# Try to determine the subject based on adjacency
|
||||
matched_subject = None
|
||||
|
||||
# First priority: right neighbor if it matches a subject in the header
|
||||
if right_context and j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# Second priority: use left context if it semantically relates to a teacher
|
||||
elif left_context and any(keyword in left_context.lower() for keyword in ['учитель', 'teacher', 'кафедра', 'department']):
|
||||
# If left context indicates a department, look for subject to the right of teacher
|
||||
if j+1 in header_subjects:
|
||||
matched_subject = header_subjects[j+1]
|
||||
# If no subject to the right, try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
# Third priority: try to map by position
|
||||
elif j in header_subjects:
|
||||
matched_subject = header_subjects[j]
|
||||
|
||||
# Only add if we don't have a better teacher name for this subject yet
|
||||
if matched_subject and (matched_subject not in teacher_subject_map or
|
||||
'Default Teacher for' in teacher_subject_map.get(matched_subject, '')):
|
||||
teacher_subject_map[matched_subject] = part
|
||||
|
||||
# Additional validation: Remove any teacher-subject mappings that seem incorrect
|
||||
validated_teacher_subject_map = {}
|
||||
for subject, teacher in teacher_subject_map.items():
|
||||
# Only add to validated map if teacher name passes all checks
|
||||
if self._is_likely_teacher_name(teacher):
|
||||
validated_teacher_subject_map[subject] = teacher
|
||||
else:
|
||||
print(f"Warning: Invalid teacher name '{teacher}' detected for subject '{subject}', skipping...")
|
||||
|
||||
teacher_subject_map = validated_teacher_subject_map
|
||||
|
||||
# Additional teacher-subject mapping: scan the data rows for teacher names paired with subjects
|
||||
# In many CSV files, teacher names appear in the same rows as subject data
|
||||
for i in range(header_idx + 1, min(len(rows), header_idx + 50)): # Check first 50 data rows
|
||||
current_row = rows[i]
|
||||
for j, cell_value in enumerate(current_row):
|
||||
cell_str = str(cell_value).strip()
|
||||
|
||||
# If cell contains a likely teacher name and corresponds to a subject column
|
||||
if self._is_likely_teacher_name(cell_str) and j in header_subjects:
|
||||
subject_name = header_subjects[j]
|
||||
# Only add if we don't have a better teacher name for this subject yet
|
||||
if (subject_name not in teacher_subject_map or
|
||||
'Default Teacher for' in teacher_subject_map.get(subject_name, '')):
|
||||
teacher_subject_map[subject_name] = cell_str
|
||||
|
||||
# Process each student row
|
||||
for student_row in rows[header_idx + 1:]:
|
||||
# Determine the structure dynamically based on the header
|
||||
class_col_idx = None
|
||||
name_col_idx = None
|
||||
|
||||
# Find the index of the class column (usually called "Класс")
|
||||
for idx, header in enumerate(header_row):
|
||||
if "Класс" in str(header) or "класс" in str(header) or "Class" in str(header) or "class" in str(header):
|
||||
class_col_idx = idx
|
||||
break
|
||||
|
||||
# Find the index of the name column (usually called "ФИО")
|
||||
for idx, header in enumerate(header_row):
|
||||
if "ФИО" in str(header) or "ф.и.о." in str(header).lower() or "name" in str(header).lower():
|
||||
name_col_idx = idx
|
||||
break
|
||||
|
||||
# If we couldn't find the columns properly, skip this row
|
||||
if class_col_idx is None or name_col_idx is None:
|
||||
continue
|
||||
|
||||
# Check if this row has valid data in the expected columns
|
||||
if (len(student_row) > max(class_col_idx, name_col_idx) and
|
||||
student_row[class_col_idx].strip() and # class name exists
|
||||
student_row[name_col_idx].strip() and # student name exists
|
||||
self._is_valid_student_record_by_cols(student_row, class_col_idx, name_col_idx)):
|
||||
|
||||
name = student_row[name_col_idx].strip() # Name column
|
||||
class_name = student_row[class_col_idx].strip() # Class column
|
||||
|
||||
# Insert student into the database
|
||||
self.cursor.execute(
|
||||
"INSERT OR IGNORE INTO students (class_name, full_name) VALUES (?, ?)",
|
||||
(class_name, name)
|
||||
)
|
||||
|
||||
# Get the student_id for this student
|
||||
self.cursor.execute("SELECT student_id FROM students WHERE full_name = ? AND class_name = ?", (name, class_name))
|
||||
student_id_result = self.cursor.fetchone()
|
||||
if student_id_result is None:
|
||||
continue
|
||||
student_id = student_id_result[0]
|
||||
|
||||
# Process schedule data for this student
|
||||
Reference in New Issue
Block a user