#!/usr/bin/env python
"""
combine_thesis_html.py - Combines all HTML files in the Thesis materials directory
into the main thesis document
"""
import os
import re
from bs4 import BeautifulSoup
def combine_html_files():
# Directory containing the HTML files
thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
# Main file to append content to
main_file = "Thesis_ Intelligent School Schedule Management System.html"
main_file_path = os.path.join(thesis_dir, main_file)
# Get all HTML files in the directory
html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
print(f"Found {len(html_files)} HTML files:")
for i, f in enumerate(html_files, 1):
print(f" {i}. {f}")
# Read the main file content
with open(main_file_path, 'r', encoding='utf-8') as f:
main_content = f.read()
# Parse the main file with BeautifulSoup
soup_main = BeautifulSoup(main_content, 'html.parser')
# Find the body element in the main file
main_body = soup_main.find('body')
if not main_body:
# If no body tag, create one
main_body = soup_main.new_tag('body')
soup_main.html.insert(0, main_body) if soup_main.html else soup_main.insert(0, main_body)
# Add a separator before adding new content
separator = soup_main.new_tag('hr')
separator['style'] = 'margin: 40px 0; border: 2px solid #4a6fa5;'
main_body.append(separator)
# Add a heading for the appended content
appendix_heading = soup_main.new_tag('h2')
appendix_heading.string = 'Additional Thesis Materials'
appendix_heading['style'] = 'color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;'
main_body.append(appendix_heading)
# Process each additional HTML file
for filename in html_files:
if filename == main_file: # Skip the main file
continue
print(f"Processing {filename}...")
file_path = os.path.join(thesis_dir, filename)
# Read the additional file content
with open(file_path, 'r', encoding='utf-8') as f:
additional_content = f.read()
# Parse the additional file
soup_additional = BeautifulSoup(additional_content, 'html.parser')
# Create a section for this file
section_div = soup_main.new_tag('div')
section_div['class'] = 'additional-section'
section_div['style'] = 'margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;'
# Add a heading for this section
section_heading = soup_main.new_tag('h3')
section_heading.string = f'Content from: {filename}'
section_heading['style'] = 'color: #4a6fa5; margin-top: 0;'
section_div.append(section_heading)
# Get body content from the additional file
additional_body = soup_additional.find('body')
if additional_body:
# Copy child elements from the additional body to our section
for child in additional_body.children:
if child.name: # Only copy actual elements, not text nodes
section_div.append(child.extract())
else:
# If no body tag, add the whole content
section_div.append(soup_additional)
# Append the section to the main body
main_body.append(section_div)
# Write the combined content back to the main file
with open(main_file_path, 'w', encoding='utf-8') as f:
f.write(str(soup_main.prettify()))
print(f"All HTML files have been combined into {main_file}")
print(f"Combined file saved at: {main_file_path}")
if __name__ == "__main__":
# Check if BeautifulSoup is available
try:
import bs4
combine_html_files()
except ImportError:
print("BeautifulSoup4 library is required for this script.")
print("Install it with: pip install beautifulsoup4")
# Create a simple version without BeautifulSoup
print("Creating a basic combination without BeautifulSoup...")
thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
main_file = "Thesis_ Intelligent School Schedule Management System.html"
main_file_path = os.path.join(thesis_dir, main_file)
# Get all HTML files in the directory
html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
# Read the main file content
with open(main_file_path, 'r', encoding='utf-8') as f:
main_content = f.read()
# Find the closing body tag to insert additional content
body_close_pos = main_content.rfind('