#!/usr/bin/env python """ combine_thesis_html.py - Combines all HTML files in the Thesis materials directory into the main thesis document """ import os import re from bs4 import BeautifulSoup def combine_html_files(): # Directory containing the HTML files thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials" # Main file to append content to main_file = "Thesis_ Intelligent School Schedule Management System.html" main_file_path = os.path.join(thesis_dir, main_file) # Get all HTML files in the directory html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')] print(f"Found {len(html_files)} HTML files:") for i, f in enumerate(html_files, 1): print(f" {i}. {f}") # Read the main file content with open(main_file_path, 'r', encoding='utf-8') as f: main_content = f.read() # Parse the main file with BeautifulSoup soup_main = BeautifulSoup(main_content, 'html.parser') # Find the body element in the main file main_body = soup_main.find('body') if not main_body: # If no body tag, create one main_body = soup_main.new_tag('body') soup_main.html.insert(0, main_body) if soup_main.html else soup_main.insert(0, main_body) # Add a separator before adding new content separator = soup_main.new_tag('hr') separator['style'] = 'margin: 40px 0; border: 2px solid #4a6fa5;' main_body.append(separator) # Add a heading for the appended content appendix_heading = soup_main.new_tag('h2') appendix_heading.string = 'Additional Thesis Materials' appendix_heading['style'] = 'color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;' main_body.append(appendix_heading) # Process each additional HTML file for filename in html_files: if filename == main_file: # Skip the main file continue print(f"Processing {filename}...") file_path = os.path.join(thesis_dir, filename) # Read the additional file content with open(file_path, 'r', encoding='utf-8') as f: additional_content = f.read() # Parse the additional file soup_additional = BeautifulSoup(additional_content, 'html.parser') # Create a section for this file section_div = soup_main.new_tag('div') section_div['class'] = 'additional-section' section_div['style'] = 'margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;' # Add a heading for this section section_heading = soup_main.new_tag('h3') section_heading.string = f'Content from: {filename}' section_heading['style'] = 'color: #4a6fa5; margin-top: 0;' section_div.append(section_heading) # Get body content from the additional file additional_body = soup_additional.find('body') if additional_body: # Copy child elements from the additional body to our section for child in additional_body.children: if child.name: # Only copy actual elements, not text nodes section_div.append(child.extract()) else: # If no body tag, add the whole content section_div.append(soup_additional) # Append the section to the main body main_body.append(section_div) # Write the combined content back to the main file with open(main_file_path, 'w', encoding='utf-8') as f: f.write(str(soup_main.prettify())) print(f"All HTML files have been combined into {main_file}") print(f"Combined file saved at: {main_file_path}") if __name__ == "__main__": # Check if BeautifulSoup is available try: import bs4 combine_html_files() except ImportError: print("BeautifulSoup4 library is required for this script.") print("Install it with: pip install beautifulsoup4") # Create a simple version without BeautifulSoup print("Creating a basic combination without BeautifulSoup...") thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials" main_file = "Thesis_ Intelligent School Schedule Management System.html" main_file_path = os.path.join(thesis_dir, main_file) # Get all HTML files in the directory html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')] # Read the main file content with open(main_file_path, 'r', encoding='utf-8') as f: main_content = f.read() # Find the closing body tag to insert additional content body_close_pos = main_content.rfind('') if body_close_pos == -1: # If no closing body tag, find the closing html tag html_close_pos = main_content.rfind('') if html_close_pos != -1: insert_pos = html_close_pos else: # If no closing html tag, append at the end insert_pos = len(main_content) else: insert_pos = body_close_pos # Prepare the additional content additional_content = '\n\n\n
\n

Additional Thesis Materials

\n\n' # Process each additional HTML file for filename in html_files: if filename == main_file: # Skip the main file continue print(f"Processing {filename}...") file_path = os.path.join(thesis_dir, filename) # Read the additional file content with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Remove HTML and HEAD sections to only get body content # Remove doctype content = re.sub(r']*>', '', content, flags=re.IGNORECASE) # Remove html tags content = re.sub(r']*>|', '', content, flags=re.IGNORECASE) # Remove head section content = re.sub(r']*>.*?', '', content, flags=re.DOTALL | re.IGNORECASE) # Remove opening and closing body tags content = re.sub(r']*>|', '', content, flags=re.IGNORECASE) # Add section wrapper section_content = f'\n
\n' section_content += f'

Content from: {filename}

\n' section_content += content section_content += '\n
\n' additional_content += section_content # Insert the additional content combined_content = main_content[:insert_pos] + additional_content + main_content[insert_pos:] # Write the combined content back to the main file with open(main_file_path, 'w', encoding='utf-8') as f: f.write(combined_content) print(f"All HTML files have been combined into {main_file}") print(f"Combined file saved at: {main_file_path}")