#!/usr/bin/env python """ consolidate_csv.py - Consolidates all CSV files in sample_data directory into a single CSV with sheet identifiers to distinguish between different original files """ import csv import os def consolidate_csv_files(): sample_data_dir = "sample_data" output_file = "consolidated_data.csv" if not os.path.exists(sample_data_dir): print(f"Directory '{sample_data_dir}' not found.") return # Get all CSV files and filter out the schedule template and sheet files all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')] # Keep only the actual student distribution files (not the sheets) csv_files = [] for filename in all_csv_files: if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename: csv_files.append(filename) if not csv_files: print(f"No student data CSV files found in '{sample_data_dir}' directory.") return print(f"Found {len(csv_files)} student data CSV file(s):") for i, filename in enumerate(csv_files, 1): print(f" {i}. {filename}") consolidated_rows = [] for sheet_num, filename in enumerate(csv_files, 1): csv_path = os.path.join(sample_data_dir, filename) print(f"Processing {csv_path}...") with open(csv_path, 'r', encoding='utf-8') as file: reader = csv.reader(file) rows = list(reader) # Add a column to indicate which sheet this data came from for row_idx, row in enumerate(rows): # Create a new row with the sheet number as the first column new_row = [sheet_num, filename] + row consolidated_rows.append(new_row) # Write consolidated data to a new CSV file with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) # Write header writer.writerow(['Sheet_Number', 'Original_File', 'Data_Columns']) # Write all rows for row in consolidated_rows: writer.writerow(row) print(f"Consolidated data written to {output_file}") print(f"Total rows in consolidated file: {len(consolidated_rows)}") def consolidate_csv_files_simple(): """ Creates a simpler consolidated CSV file with Sheet_Number and Original_File columns """ sample_data_dir = "sample_data" output_file = "consolidated_data_simple.csv" if not os.path.exists(sample_data_dir): print(f"Directory '{sample_data_dir}' not found.") return # Get all CSV files all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')] # Keep only the actual student distribution files (not the sheets) csv_files = [] for filename in all_csv_files: if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename: csv_files.append(filename) if not csv_files: print(f"No student data CSV files found in '{sample_data_dir}' directory.") return print(f"Found {len(csv_files)} student data CSV file(s):") for i, filename in enumerate(csv_files, 1): print(f" {i}. {filename}") with open(output_file, 'w', newline='', encoding='utf-8') as outfile: writer = csv.writer(outfile) # Process each file for sheet_num, filename in enumerate(csv_files, 1): csv_path = os.path.join(sample_data_dir, filename) print(f"Processing {csv_path}...") with open(csv_path, 'r', encoding='utf-8') as infile: reader = csv.reader(infile) for row in reader: # Add sheet number and filename as first two columns new_row = [sheet_num, filename] + row writer.writerow(new_row) print(f"Simple consolidated data written to {output_file}") if __name__ == "__main__": print("Creating consolidated CSV with sheet identifiers...") consolidate_csv_files_simple() print("Done! You can now upload the consolidated_data_simple.csv file for AI/ML analysis.")