Added

2026-02-05 10:15:09 +03:00
parent 2427fce842
commit 67241a5ed0
33 changed files with 13147 additions and 154 deletions
--- a/scheduler_bots/consolidate_csv.py
+++ b/scheduler_bots/consolidate_csv.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+"""
+consolidate_csv.py - Consolidates all CSV files in sample_data directory into a single CSV
+with sheet identifiers to distinguish between different original files
+"""
+
+import csv
+import os
+
+
+def consolidate_csv_files():
+    sample_data_dir = "sample_data"
+    output_file = "consolidated_data.csv"
+    
+    if not os.path.exists(sample_data_dir):
+        print(f"Directory '{sample_data_dir}' not found.")
+        return
+
+    # Get all CSV files and filter out the schedule template and sheet files
+    all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
+    
+    # Keep only the actual student distribution files (not the sheets)
+    csv_files = []
+    for filename in all_csv_files:
+        if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
+            csv_files.append(filename)
+    
+    if not csv_files:
+        print(f"No student data CSV files found in '{sample_data_dir}' directory.")
+        return
+    
+    print(f"Found {len(csv_files)} student data CSV file(s):")
+    for i, filename in enumerate(csv_files, 1):
+        print(f"  {i}. {filename}")
+    
+    consolidated_rows = []
+    
+    for sheet_num, filename in enumerate(csv_files, 1):
+        csv_path = os.path.join(sample_data_dir, filename)
+        
+        print(f"Processing {csv_path}...")
+        
+        with open(csv_path, 'r', encoding='utf-8') as file:
+            reader = csv.reader(file)
+            rows = list(reader)
+        
+        # Add a column to indicate which sheet this data came from
+        for row_idx, row in enumerate(rows):
+            # Create a new row with the sheet number as the first column
+            new_row = [sheet_num, filename] + row
+            consolidated_rows.append(new_row)
+    
+    # Write consolidated data to a new CSV file
+    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
+        writer = csv.writer(csvfile)
+        # Write header
+        writer.writerow(['Sheet_Number', 'Original_File', 'Data_Columns'])
+        # Write all rows
+        for row in consolidated_rows:
+            writer.writerow(row)
+    
+    print(f"Consolidated data written to {output_file}")
+    print(f"Total rows in consolidated file: {len(consolidated_rows)}")
+
+
+def consolidate_csv_files_simple():
+    """
+    Creates a simpler consolidated CSV file with Sheet_Number and Original_File columns
+    """
+    sample_data_dir = "sample_data"
+    output_file = "consolidated_data_simple.csv"
+    
+    if not os.path.exists(sample_data_dir):
+        print(f"Directory '{sample_data_dir}' not found.")
+        return
+
+    # Get all CSV files
+    all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
+    
+    # Keep only the actual student distribution files (not the sheets)
+    csv_files = []
+    for filename in all_csv_files:
+        if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
+            csv_files.append(filename)
+    
+    if not csv_files:
+        print(f"No student data CSV files found in '{sample_data_dir}' directory.")
+        return
+    
+    print(f"Found {len(csv_files)} student data CSV file(s):")
+    for i, filename in enumerate(csv_files, 1):
+        print(f"  {i}. {filename}")
+    
+    with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
+        writer = csv.writer(outfile)
+        
+        # Process each file
+        for sheet_num, filename in enumerate(csv_files, 1):
+            csv_path = os.path.join(sample_data_dir, filename)
+            
+            print(f"Processing {csv_path}...")
+            
+            with open(csv_path, 'r', encoding='utf-8') as infile:
+                reader = csv.reader(infile)
+                
+                for row in reader:
+                    # Add sheet number and filename as first two columns
+                    new_row = [sheet_num, filename] + row
+                    writer.writerow(new_row)
+    
+    print(f"Simple consolidated data written to {output_file}")
+
+
+if __name__ == "__main__":
+    print("Creating consolidated CSV with sheet identifiers...")
+    consolidate_csv_files_simple()
+    print("Done! You can now upload the consolidated_data_simple.csv file for AI/ML analysis.")