Added
This commit is contained in:
117
scheduler_bots/consolidate_csv.py
Normal file
117
scheduler_bots/consolidate_csv.py
Normal file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
consolidate_csv.py - Consolidates all CSV files in sample_data directory into a single CSV
|
||||
with sheet identifiers to distinguish between different original files
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
|
||||
|
||||
def consolidate_csv_files():
|
||||
sample_data_dir = "sample_data"
|
||||
output_file = "consolidated_data.csv"
|
||||
|
||||
if not os.path.exists(sample_data_dir):
|
||||
print(f"Directory '{sample_data_dir}' not found.")
|
||||
return
|
||||
|
||||
# Get all CSV files and filter out the schedule template and sheet files
|
||||
all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
|
||||
|
||||
# Keep only the actual student distribution files (not the sheets)
|
||||
csv_files = []
|
||||
for filename in all_csv_files:
|
||||
if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
|
||||
csv_files.append(filename)
|
||||
|
||||
if not csv_files:
|
||||
print(f"No student data CSV files found in '{sample_data_dir}' directory.")
|
||||
return
|
||||
|
||||
print(f"Found {len(csv_files)} student data CSV file(s):")
|
||||
for i, filename in enumerate(csv_files, 1):
|
||||
print(f" {i}. {filename}")
|
||||
|
||||
consolidated_rows = []
|
||||
|
||||
for sheet_num, filename in enumerate(csv_files, 1):
|
||||
csv_path = os.path.join(sample_data_dir, filename)
|
||||
|
||||
print(f"Processing {csv_path}...")
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8') as file:
|
||||
reader = csv.reader(file)
|
||||
rows = list(reader)
|
||||
|
||||
# Add a column to indicate which sheet this data came from
|
||||
for row_idx, row in enumerate(rows):
|
||||
# Create a new row with the sheet number as the first column
|
||||
new_row = [sheet_num, filename] + row
|
||||
consolidated_rows.append(new_row)
|
||||
|
||||
# Write consolidated data to a new CSV file
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
# Write header
|
||||
writer.writerow(['Sheet_Number', 'Original_File', 'Data_Columns'])
|
||||
# Write all rows
|
||||
for row in consolidated_rows:
|
||||
writer.writerow(row)
|
||||
|
||||
print(f"Consolidated data written to {output_file}")
|
||||
print(f"Total rows in consolidated file: {len(consolidated_rows)}")
|
||||
|
||||
|
||||
def consolidate_csv_files_simple():
|
||||
"""
|
||||
Creates a simpler consolidated CSV file with Sheet_Number and Original_File columns
|
||||
"""
|
||||
sample_data_dir = "sample_data"
|
||||
output_file = "consolidated_data_simple.csv"
|
||||
|
||||
if not os.path.exists(sample_data_dir):
|
||||
print(f"Directory '{sample_data_dir}' not found.")
|
||||
return
|
||||
|
||||
# Get all CSV files
|
||||
all_csv_files = [f for f in os.listdir(sample_data_dir) if f.endswith('.csv')]
|
||||
|
||||
# Keep only the actual student distribution files (not the sheets)
|
||||
csv_files = []
|
||||
for filename in all_csv_files:
|
||||
if 'first_sheet' not in filename and 'last_sheet' not in filename and 'template' not in filename:
|
||||
csv_files.append(filename)
|
||||
|
||||
if not csv_files:
|
||||
print(f"No student data CSV files found in '{sample_data_dir}' directory.")
|
||||
return
|
||||
|
||||
print(f"Found {len(csv_files)} student data CSV file(s):")
|
||||
for i, filename in enumerate(csv_files, 1):
|
||||
print(f" {i}. {filename}")
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
|
||||
writer = csv.writer(outfile)
|
||||
|
||||
# Process each file
|
||||
for sheet_num, filename in enumerate(csv_files, 1):
|
||||
csv_path = os.path.join(sample_data_dir, filename)
|
||||
|
||||
print(f"Processing {csv_path}...")
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8') as infile:
|
||||
reader = csv.reader(infile)
|
||||
|
||||
for row in reader:
|
||||
# Add sheet number and filename as first two columns
|
||||
new_row = [sheet_num, filename] + row
|
||||
writer.writerow(new_row)
|
||||
|
||||
print(f"Simple consolidated data written to {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Creating consolidated CSV with sheet identifiers...")
|
||||
consolidate_csv_files_simple()
|
||||
print("Done! You can now upload the consolidated_data_simple.csv file for AI/ML analysis.")
|
||||
Reference in New Issue
Block a user